In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline
from classifiers import *
from sklearn.metrics import confusion_matrix

## Original dataset

In [2]:
train_data = pd.read_csv('data/train.csv', sep = '|')
test_data = pd.read_csv('data/test.csv', sep = '|')
print(f'Train set has {train_data.shape[0]} entries and {train_data.shape[1]} features')
print(f'Test set has {test_data.shape[0]} entries and {test_data.shape[1]} features')

Train set has 1879 entries and 10 features
Test set has 498121 entries and 9 features


In [3]:
## normalize w/ encode 
y = train_data['fraud']
X = train_data.drop(columns=['fraud']).astype(float)
X_all = X.append(test_data, sort=False)
X_all= pd.get_dummies(X_all, columns=['trustLevel'], prefix='trustLevel')
X_norm_encode = pd.DataFrame(MinMaxScaler().fit_transform(X_all), columns=X_all.columns, index=X_all.index)
print(X_norm_encode.shape)
X_train_norm_enc = X_norm_encode.iloc[:1879,:]
X_test_norm_enc = X_norm_encode.iloc[1879:,:]
# print(X_train_norm_enc)

(500000, 14)


  return self.partial_fit(X, y)


In [4]:
## normalized w/o encode 
y = train_data['fraud']
X = train_data.drop(columns=['fraud']).astype(float)
X_all = X.append(test_data, sort=False)
X_norm = pd.DataFrame(MinMaxScaler().fit_transform(X_all), columns=X_all.columns, index=X_all.index)
print(X_norm.shape)
X_train_norm = X_norm.iloc[:1879,:]
X_test_norm = X_norm.iloc[1879:,:]
# print(X_train_norm)

(500000, 9)


In [5]:
#basic case
evaluate_classification(train_data.drop(columns=['fraud']), y)

[-55 -55 -55 -55 -50 -50 -50 -50 -50 -50]
Linear SVM: test core = -520 
[-55 -55 -55 -55 -50 -50 -50 -50 -50 -50]
RBF SVM: test core = -520 




[-55 -55 -55 -55 -50 -50 -50 -50 -50 -50]




Logistic Regression: test core = -520 
[ -55  -55 -370  -55  -50  -50  -50  -50 -480 -775]
Neural Net: test core = -520 
[-55 -55 -55 -55 -50 -50 -50 -50 -50 -50]
Random Forest: test core = -500 
[ 20 -40 -15  15 -30 -10  10  15 -90  -5]
AdaBoost: test core = -130 
[ 10  10 -50  25 -20 -10  -5 -10 -40  10]
XGBoost: test core = -80 


In [6]:
# with normalized
evaluate_classification(X_train_norm, y)

[-55 -55 -55 -55 -50 -50 -50 -50 -50 -50]
Linear SVM: test core = -520 
[-55 -55 -55 -55 -50 -50 -50 -50 -50 -50]
RBF SVM: test core = -520 
[-55 -45 -55 -35 -50 -40 -40 -75 -50 -50]
Logistic Regression: test core = -495 
[-55 -45 -55 -45 -50 -50 -40 -50 -50 -50]
Neural Net: test core = -500 
[-55 -55 -55 -55 -50 -50 -40 -50 -50 -50]
Random Forest: test core = -510 
[ 20 -40 -15  15 -30 -10  10  15 -90  -5]
AdaBoost: test core = -130 
[ 10  10 -50  25 -20 -10  -5 -10 -40  10]
XGBoost: test core = -80 


In [9]:
# with normalize and encode
evaluate_classification(X_train_norm_enc, y)

[-55 -55 -55 -55 -50 -50 -50 -50 -50 -50]
Linear SVM: test core = -520 
[ -55  -55  -55  -80  -50  -50  -40  -75 -100  -50]
RBF SVM: test core = -610 
[ -55  -35  -60 -110  -40  -40  -55  -70  -90  -75]
Logistic Regression: test core = -630 
[-55 -45 -55 -45 -50 -50 -40 -75 -50 -50]
Neural Net: test core = -495 
[-55 -55 -55 -55 -50 -50 -50 -50 -50 -50]
Random Forest: test core = -520 
[ 35 -65 -15  25  -5 -35  10   5 -30  20]
AdaBoost: test core = -55 
[  0  10 -25   0 -20 -20 -15  15 -15  10]
XGBoost: test core = -60 


<h2> Define xgboost with cost sensitive </h2>

In [10]:
#!/usr/bin/python
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def logistic_obj(y_hat, dtrain, alpha=5, beta=25): # alpha for FN beta for FP
    y = dtrain.get_label()
    pred = 1. / (1. + np.exp(-y_hat))
    grad = pred * (beta + alpha*y - beta*y) - alpha*y  # alpha*(p-1)*y + beta*p*(1-y)
    hess = pred * (1 - pred) * (beta + alpha*y - beta*y)
    return grad, hess

def err_rate(pred, dtrain):
    y = dtrain.get_label()
    pred = 1. / (1. + np.exp(-pred))
    loss_fn = y*np.log(pred)
    loss_fp = (1.0 - y)*np.log(1.0 - pred)
    return 'error', np.sum(-(5*loss_fn+25*loss_fp))/len(y)



In [11]:
def cross_validate(X_train, X_test, y_train, y_test, depth, num_round):
    # load data
    d_train = xgb.DMatrix(X_train, label=y_train)
    d_val = xgb.DMatrix(X_test, label=y_test)
    # build model
    param = {'max_depth': depth, 'eta': 1, 'silent': 1, 'seed': 0}
#     watchlist = [(d_val, 'eval'), (d_train, 'train')]
#     model_trn = xgb.train(param, d_train, num_round, watchlist, obj=logistic_obj, feval=err_rate)
    model_trn = xgb.train(param, d_train, num_round, obj=logistic_obj, feval=err_rate)
    # prediction
    pred = model_trn.predict(d_val) 
    pred = 1. / (1. + np.exp(-pred))
    return pred

<h2> test with one train-test split</h2>

In [12]:
## normalized dataset
X_train, X_test, y_train, y_test = train_test_split(X_train_norm, y, test_size=0.3, random_state=42)
pred = cross_validate(X_train, X_test, y_train, y_test, 4, 90)
print(confusion_matrix(y_test, pred>0.5))
print(f'cost = {profit_scorer(y_test, pred>0.5)}')

[[538   2]
 [  5  19]]
cost = 20


  if getattr(data, 'base', None) is not None and \


In [13]:
## encode and normalized dataset
X_train, X_test, y_train, y_test = train_test_split(X_train_norm_enc, y, test_size=0.3, random_state=42)
pred = cross_validate(X_train, X_test, y_train, y_test, 2, 100)
print(confusion_matrix(y_test, pred>0.5))
print(f'cost = {profit_scorer(y_test, pred>0.5)}')

[[539   1]
 [  4  20]]
cost = 55


<h2> test with cv split</h2>

In [20]:
## normalized dataset
cv = StratifiedKFold(n_splits=10, random_state=42)
cost=0
for train_index, test_index in cv.split(X_train_norm, y):
    X_train, X_test = X_train_norm.iloc[train_index], X_train_norm.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]
    pred = cross_validate(X_train, X_test, y_train, y_test, 2, 130)
    print(f' confusion matrix: cost({profit_scorer(y_test, pred>0.5)})\n {confusion_matrix(y_test, pred>0.5)}')
    cost += profit_scorer(y_test, pred>0.5)

print(f'cost = {cost}')

  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(40)
 [[178   0]
 [  1   9]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
cost = 130


In [18]:
## encode and normalized dataset
cv = StratifiedKFold(n_splits=10, random_state=42)
cost=0
for train_index, test_index in cv.split(X_train_norm_enc, y):
    X_train, X_test = X_train_norm_enc.iloc[train_index], X_train_norm_enc.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]
    pred = cross_validate(X_train, X_test, y_train, y_test, 2, 60)
    print(f' confusion matrix: cost({profit_scorer(y_test, pred>0.5)})\n {confusion_matrix(y_test, pred>0.5)}')
    cost += profit_scorer(y_test, pred>0.5)

print(f'cost = {cost}')

  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(-25)
 [[176   2]
 [  3   8]]
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(15)
 [[177   1]
 [  1   9]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
cost = 90


<h2> test parameters - depth/num_round</h2>

In [50]:
## feature select dataset
def parameter_tuning(train_data, number_depth, number_rounds):
    for i in range(number_depth):
        cv = StratifiedKFold(n_splits=10, random_state=42)
        cost=0
        for train_index, test_index in cv.split(train_data, y):
            X_train, X_test = X_train_norm[train_index], X_train_norm[test_index]
            y_train, y_test = y[train_index], y[test_index]
            pred = cross_validate(X_train, X_test, y_train, y_test, i, 130)
            print(f' confusion matrix: cost({profit_scorer(y_test, pred>0.5)})\n {confusion_matrix(y_test, pred>0.5)}')
            cost += profit_scorer(y_test, pred>0.5)
        print(f'depth {i} cost = {cost}')
    for i in range(number_rounds):
        cv = StratifiedKFold(n_splits=10, random_state=42)
        cost=0
        for train_index, test_index in cv.split(X_train_norm, y):
            X_train, X_test = X_train_norm[train_index], X_train_norm[test_index]
            y_train, y_test = y[train_index], y[test_index]
            pred = cross_validate(X_train, X_test, y_train, y_test, 2, i*10)
            print(f' confusion matrix: cost({profit_scorer(y_test, pred>0.5)})\n {confusion_matrix(y_test, pred>0.5)}')
            cost += profit_scorer(y_test, pred>0.5)
        print(f'depth {i} cost = {cost}')

In [51]:
parameter_tuning(X_train_norm_enc, 10,20)

  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(-55)
 [[178   0]
 [ 11   0]]
 confusion matrix: cost(-55)
 [[178   0]
 [ 11   0]]
 confusion matrix: cost(-55)
 [[178   0]
 [ 11   0]]
 confusion matrix: cost(-55)
 [[178   0]
 [ 11   0]]
 confusion matrix: cost(-50)
 [[178   0]
 [ 10   0]]
 confusion matrix: cost(-50)
 [[177   0]
 [ 10   0]]
 confusion matrix: cost(-50)
 [[177   0]
 [ 10   0]]
 confusion matrix: cost(-50)
 [[177   0]
 [ 10   0]]
 confusion matrix: cost(-50)
 [[177   0]
 [ 10   0]]
 confusion matrix: cost(-50)
 [[177   0]
 [ 10   0]]
depth 0 cost = -520


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(5)
 [[178   0]
 [  5   6]]
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]
 confusion matrix: cost(10)
 [[178   0]
 [  4   6]]
 confusion matrix: cost(15)
 [[176   1]
 [  1   9]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(0)
 [[177   0]
 [  5   5]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
depth 1 cost = 45
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(40)
 [[178   0]
 [  1   9]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 2 cost = 130
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(-15)
 [[176   2]
 [  2   9]]
 confusion matrix: cost(-40)
 [[175   3]
 [  2   9]]
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]
 confusion matrix: cost(40)
 [[178   0]
 [  1   9]]
 confusion matrix: cost(-45)
 [[174   3]
 [  2   8]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(-50)
 [[175   2]
 [  5   5]]
 confusion matrix: cost(-40)
 [[175   2]
 [  4   6]]
depth 3 cost = -125
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(10)
 [[177   1]
 [  2   9]]
 confusion matrix: cost(-35)
 [[176   2]
 [  4   7]]
 confusion matrix: cost(5)
 [[178   0]
 [  5   6]]
 confusion matrix: cost(-20)
 [[176   2]
 [  2   8]]
 confusion matrix: cost(-30)
 [[175   2]
 [  3   7]]
 confusion matrix: cost(-30)
 [[175   2]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-10)
 [[177   0]
 [  6   4]]
 confusion matrix: cost(0)
 [[177   0]
 [  5   5]]
depth 4 cost = -105
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(10)
 [[177   1]
 [  2   9]]
 confusion matrix: cost(-60)
 [[175   3]
 [  4   7]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(-20)
 [[176   2]
 [  2   8]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-50)
 [[175   2]
 [  5   5]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(0)
 [[177   0]
 [  5   5]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
depth 5 cost = -105
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(10)
 [[177   1]
 [  2   9]]
 confusion matrix: cost(-65)
 [[174   4]
 [  2   9]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(25)
 [[177   1]
 [  0  10]]
 confusion matrix: cost(-55)
 [[174   3]
 [  3   7]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(-20)
 [[175   2]
 [  2   8]]
 confusion matrix: cost(-35)
 [[176   1]
 [  6   4]]
 confusion matrix: cost(-50)
 [[175   2]
 [  5   5]]
depth 6 cost = -165
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(-60)
 [[175   3]
 [  4   7]]
 confusion matrix: cost(5)
 [[178   0]
 [  5   6]]
 confusion matrix: cost(0)
 [[176   2]
 [  0  10]]
 confusion matrix: cost(-20)
 [[175   2]
 [  2   8]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-35)
 [[176   1]
 [  6   4]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
depth 7 cost = -50
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(-60)
 [[175   3]
 [  4   7]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(50)
 [[178   0]
 [  0  10]]
 confusion matrix: cost(-10)
 [[175   2]
 [  1   9]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-35)
 [[176   1]
 [  6   4]]
 confusion matrix: cost(-50)
 [[175   2]
 [  5   5]]
depth 8 cost = -40
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(10)
 [[177   1]
 [  2   9]]
 confusion matrix: cost(-55)
 [[174   4]
 [  1  10]]
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]
 confusion matrix: cost(5)
 [[177   1]
 [  2   8]]
 confusion matrix: cost(-20)
 [[175   2]
 [  2   8]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
depth 9 cost = -100
 confusion matrix: cost(-4395)
 [[  0 178]
 [  0  11]]
 confusion matrix: cost(-4395)
 [[  0 178]
 [  0  11]]
 confusion matrix: cost(-4395)
 [[  0 178]
 [  0  11]]
 confusion matrix: cost(-4395)
 [[  0 178]
 [  0  11]]
 confusion matrix: cost(-4400)
 [[  0 178]
 [  0  10]]
 confusion matrix: cost(-4375)
 [[  0 177]
 [  0  10]]
 confusion matrix: cost(-4375)
 [[  0 177]
 [  0  10]]
 confusion matrix: cost(-4375)
 [[  0 177]
 [  0  10]]
 confusion matrix: cost(-4375)
 [[  0 177]
 [  0  10]]
 confusion ma

  if getattr(data, 'base', None) is not None and \
  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(-20)
 [[177   0]
 [  7   3]]
depth 1 cost = -295
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]
 confusion matrix: cost(10)
 [[177   1]
 [  2   9]]
 confusion matrix: cost(-25)
 [[176   2]
 [  3   8]]
 confusion matrix: cost(-5)
 [[178   0]
 [  6   5]]
 confusion matrix: cost(-40)
 [[176   2]
 [  4   6]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(-10)
 [[177   0]
 [  6   4]]
depth 2 cost = -125
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]
 confusion matrix: cost(10)
 [[177   1]
 [  2   9]]
 confusion matrix: cost(-35)
 [[176   2]
 [  4   7]]
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(-5)
 [[177   1]
 [  3   7]]
 confusion matrix: cost(-40)
 [[175   2]
 [  4   6]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(15)
 [[176   1]
 [  1   9]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
depth 3 cost = -80


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]
 confusion matrix: cost(10)
 [[177   1]
 [  2   9]]
 confusion matrix: cost(-10)
 [[177   1]
 [  4   7]]
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]
 confusion matrix: cost(20)
 [[178   0]
 [  3   7]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-50)
 [[175   2]
 [  5   5]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 4 cost = -15


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(5)
 [[178   0]
 [  5   6]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(-35)
 [[176   2]
 [  4   7]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(30)
 [[178   0]
 [  2   8]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 5 cost = 55
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(30)
 [[178   0]
 [  2   8]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-40)
 [[175   2]
 [  4   6]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 6 cost = 70
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(20)
 [[178   0]
 [  3   7]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 7 cost = 95


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(15)
 [[178   0]
 [  4   7]]
 confusion matrix: cost(30)
 [[178   0]
 [  2   8]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 8 cost = 85
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(5)
 [[177   1]
 [  2   8]]
 confusion matrix: cost(-30)
 [[175   2]
 [  3   7]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 9 cost = 65
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(30)
 [[178   0]
 [  2   8]]
 confusion matrix: cost(-30)
 [[175   2]
 [  3   7]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 10 cost = 95
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(30)
 [[178   0]
 [  2   8]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 11 cost = 95
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(30)
 [[178   0]
 [  2   8]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(5)
 [[176   1]
 [  2   8]]
 confusion matrix: cost(-35)
 [[176   1]
 [  6   4]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 12 cost = 85
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(40)
 [[178   0]
 [  1   9]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-15)
 [[176   1]
 [  4   6]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 13 cost = 130
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(40)
 [[178   0]
 [  1   9]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-35)
 [[176   1]
 [  6   4]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 14 cost = 110
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(40)
 [[178   0]
 [  1   9]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-35)
 [[176   1]
 [  6   4]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 15 cost = 110
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(40)
 [[178   0]
 [  1   9]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-35)
 [[176   1]
 [  6   4]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 16 cost = 120
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(30)
 [[178   0]
 [  2   8]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-35)
 [[176   1]
 [  6   4]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 17 cost = 110
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(30)
 [[178   0]
 [  2   8]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(20)
 [[177   0]
 [  3   7]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-25)
 [[176   1]
 [  5   5]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
depth 18 cost = 120
 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]


  if getattr(data, 'base', None) is not None and \


 confusion matrix: cost(35)
 [[178   0]
 [  2   9]]
 confusion matrix: cost(0)
 [[177   1]
 [  3   8]]
 confusion matrix: cost(25)
 [[178   0]
 [  3   8]]
 confusion matrix: cost(30)
 [[178   0]
 [  2   8]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(10)
 [[177   0]
 [  4   6]]
 confusion matrix: cost(-5)
 [[176   1]
 [  3   7]]
 confusion matrix: cost(-35)
 [[176   1]
 [  6   4]]
 confusion matrix: cost(0)
 [[177   0]
 [  5   5]]
depth 19 cost = 90


In [43]:
parameter_tuning(X_train_select, 10,20)

  if getattr(data, 'base', None) is not None and \


depth 0 cost = -520


  if getattr(data, 'base', None) is not None and \


depth 1 cost = 45


  if getattr(data, 'base', None) is not None and \


depth 2 cost = 130


  if getattr(data, 'base', None) is not None and \


depth 3 cost = -125


  if getattr(data, 'base', None) is not None and \


depth 4 cost = -105


  if getattr(data, 'base', None) is not None and \


depth 5 cost = -105


  if getattr(data, 'base', None) is not None and \


depth 6 cost = -165


  if getattr(data, 'base', None) is not None and \


depth 7 cost = -50


  if getattr(data, 'base', None) is not None and \


depth 8 cost = -40


  if getattr(data, 'base', None) is not None and \


depth 9 cost = -100
depth 0 cost = -43855
depth 1 cost = -295


  if getattr(data, 'base', None) is not None and \
  if getattr(data, 'base', None) is not None and \
  if getattr(data, 'base', None) is not None and \


depth 2 cost = -125


  if getattr(data, 'base', None) is not None and \


depth 3 cost = -80


  if getattr(data, 'base', None) is not None and \


depth 4 cost = -15


  if getattr(data, 'base', None) is not None and \


depth 5 cost = 55


  if getattr(data, 'base', None) is not None and \


depth 6 cost = 70


  if getattr(data, 'base', None) is not None and \


depth 7 cost = 95


  if getattr(data, 'base', None) is not None and \


depth 8 cost = 85


  if getattr(data, 'base', None) is not None and \


depth 9 cost = 65


  if getattr(data, 'base', None) is not None and \


depth 10 cost = 95


  if getattr(data, 'base', None) is not None and \


depth 11 cost = 95


  if getattr(data, 'base', None) is not None and \


depth 12 cost = 85


  if getattr(data, 'base', None) is not None and \


depth 13 cost = 130


  if getattr(data, 'base', None) is not None and \


depth 14 cost = 110


  if getattr(data, 'base', None) is not None and \


depth 15 cost = 110


  if getattr(data, 'base', None) is not None and \


depth 16 cost = 120


  if getattr(data, 'base', None) is not None and \


depth 17 cost = 110


  if getattr(data, 'base', None) is not None and \


depth 18 cost = 120


  if getattr(data, 'base', None) is not None and \


depth 19 cost = 90
