# Testing Algorithms

In [181]:
import pandas as pd
import numpy as np
import math
import os
import datetime
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE 
import seaborn as sb
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
pd.set_option('display.max_columns', 200)

TEST = True

## Retrieve the datasets

In [182]:
test_data = pd.read_csv("./encoded_test.csv", na_values= ['?', 'NA', ''],sep= ',')
train_data = pd.read_csv("./encoded_train.csv",na_values= ['?', 'NA', ''],sep=  ',')


## Build Prediction
This function builds the csv for submiting in kaggle

In [198]:
def build_prediction(pred):
    if TEST:
        print(pred)
        pred = pd.DataFrame(pred, columns=['Predicted','col'])
        pred.drop('col', axis=1, inplace=True)
        loan_id = pd.DataFrame(test_data, columns=['loan_id'])

        results = pd.concat([loan_id, pred], axis=1)
        results = results.rename(
                columns={'loan_id': 'Id'})

        results.to_csv('results.csv', index = False)
       
        return results
    else:
        report = classification_report(y_test, pred, output_dict=True,zero_division= 0)

        print(f"Report:\n{classification_report(y_test, pred,zero_division= 0)}\n")

        sb.set(font_scale=1.0)
        
        ax = plt.subplot()
        cm = confusion_matrix(y_test, pred)

        sb.heatmap(cm, annot=True, ax=ax, fmt="g")

        ax.set_xlabel('Predicted');
        ax.set_ylabel('Observed');
        ax.set_title('Confusion Matrix');
        plt.show()


In [184]:
inputs = train_data.drop(columns=['loan_id', 'status'])
labels = train_data['status'].values
oversample = SMOTE()

if (TEST):
    
    X_train = train_data.drop(columns=['loan_id', 'status'])
    y_train = train_data['status'].values

    X_train, y_train = oversample.fit_resample(inputs, labels)

    X_test = test_data.drop(columns=['loan_id', 'status'])
    y_test = test_data['status'].values
else:
    inputs, labels = oversample.fit_resample(inputs, labels)
    (X_train, X_test, y_train, y_test) = train_test_split(inputs, labels, test_size=0.25, random_state=0)



## Decision Tree

In [185]:
# Create the classifier
decision_tree_classifier = DecisionTreeClassifier()

# Train the classifier on the training set
decision_tree_classifier.fit(X_train, y_train)

pred= decision_tree_classifier.predict_proba(X_test) if TEST else decision_tree_classifier.predict(X_test)
build_prediction(pred)


[[0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 

Unnamed: 0,Id,Predicted
0,5895,1.0
1,5172,1.0
2,6207,1.0
3,7122,0.0
4,7067,1.0
...,...,...
349,7294,1.0
350,6321,1.0
351,6469,1.0
352,5614,1.0


In [186]:
parameter_grid = {'criterion': ['gini', 'entropy'],
                  'splitter': ['best', 'random'],
                  'max_depth': range(10, 20),
                  'max_features': range(10,20)}

grid_search = GridSearchCV(DecisionTreeClassifier(),
                           param_grid=parameter_grid,
                           cv=10,
                           verbose=4,
                           n_jobs=-1)

grid_search.fit(X_train, y_train)
print('Best score: {}'.format(grid_search.best_score_))
print('Best parameters: {}'.format(grid_search.best_params_))
print('Best estimator: {}'.format(grid_search.best_estimator_))

Fitting 10 folds for each of 400 candidates, totalling 4000 fits
Best score: 0.8835526315789475
Best parameters: {'criterion': 'gini', 'max_depth': 19, 'max_features': 19, 'splitter': 'best'}
Best estimator: DecisionTreeClassifier(max_depth=19, max_features=19)


In [187]:
decision_tree_classifier = grid_search.best_estimator_

pred= decision_tree_classifier.predict_proba(X_test) if TEST else decision_tree_classifier.predict(X_test)
build_prediction(pred)

[[0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 

Unnamed: 0,Id,Predicted
0,5895,1.0
1,5172,1.0
2,6207,1.0
3,7122,1.0
4,7067,0.0
...,...,...
349,7294,1.0
350,6321,1.0
351,6469,1.0
352,5614,1.0


## SVC 

In [188]:

svc = SVC(probability=True)
svc.fit(X_train, y_train)

pred= svc.predict_proba(X_test) if TEST else svc.predict(X_test)
results=build_prediction(pred)

     

[[0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]

In [189]:
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test= scaler.transform(X_test)

ANNClassifier = MLPClassifier(random_state=1, max_iter=500)

# Train the classifier on the training set
ANNClassifier.fit(X_train, y_train)

pred= ANNClassifier.predict_proba(X_test) if TEST else ANNClassifier.predict(X_test)
build_prediction(pred)




[[1.35368954e-05 9.99986463e-01]
 [3.35795479e-04 9.99664205e-01]
 [6.83826146e-06 9.99993162e-01]
 [9.99994339e-01 5.66080173e-06]
 [6.74430765e-04 9.99325569e-01]
 [1.82783730e-06 9.99998172e-01]
 [5.47993164e-07 9.99999452e-01]
 [4.09821578e-08 9.99999959e-01]
 [2.13249451e-08 9.99999979e-01]
 [2.14251003e-06 9.99997857e-01]
 [2.40530919e-07 9.99999759e-01]
 [1.39181082e-02 9.86081892e-01]
 [1.23683239e-05 9.99987632e-01]
 [1.75309899e-04 9.99824690e-01]
 [1.56777636e-07 9.99999843e-01]
 [4.35272297e-01 5.64727703e-01]
 [3.97128369e-01 6.02871631e-01]
 [7.32324258e-03 9.92676757e-01]
 [4.77887017e-02 9.52211298e-01]
 [4.56214484e-05 9.99954379e-01]
 [1.63306074e-04 9.99836694e-01]
 [1.37906195e-05 9.99986209e-01]
 [9.99999999e-01 5.86674458e-10]
 [4.34365138e-05 9.99956563e-01]
 [1.83593905e-04 9.99816406e-01]
 [1.30465500e-04 9.99869535e-01]
 [3.93478062e-04 9.99606522e-01]
 [2.54690486e-04 9.99745310e-01]
 [3.72851267e-06 9.99996271e-01]
 [2.28411403e-04 9.99771589e-01]
 [3.344357

Unnamed: 0,Id,Predicted
0,5895,0.999986
1,5172,0.999664
2,6207,0.999993
3,7122,0.000006
4,7067,0.999326
...,...,...
349,7294,0.999999
350,6321,0.993074
351,6469,0.999991
352,5614,0.999997


In [190]:
scaler = StandardScaler()

# Fit only to the training data
scaler.fit(X_train)

# Now apply the transformations to the data:
X_train_nn = scaler.transform(X_train)
X_test_nn = scaler.transform(X_test)

# Create the classifier
ANNClassifier = MLPClassifier(random_state=1, max_iter=500)

# Train the classifier on the training set
ANNClassifier.fit(X_train_nn, y_train)

pred = ANNClassifier.predict(X_test_nn)

build_prediction(pred)

[ 1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1
  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1
  1  1  1  1  1  1 -1 -1  1  1  1  1 -1  1  1  1  1  1 -1  1  1 -1  1  1
  1  1 -1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
 -1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1
 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
 -1 -1 -1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1 -1 -1 -1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1 -1 -1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1 -1  1  1
  1  1  1  1  1  1  1 -1  1  1 -1  1  1  1  1  1  1  1 -1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1 -1

ValueError: Shape of passed values is (354, 1), indices imply (354, 2)

In [191]:
# parameter_grid = {'activation': ['tanh','identity','logistic','relu'],
#                   'solver': ['adam','lbfgs','sgd'],
#                   'hidden_layer_sizes': [3,5,8,13,21,34],
#                   'verbose': [True]}

# cross_validation = StratifiedKFold(n_splits=10, shuffle=True)

# grid_search = GridSearchCV(ANNClassifier,
#                            param_grid=parameter_grid,
#                            cv=cross_validation)

# grid_search.fit(X_train, y_train)

# print('Best score: {}'.format(grid_search.best_score_))
# print('Best parameters: {}'.format(grid_search.best_params_))
# print('Best estimator: {}'.format(grid_search.best_estimator_))

In [192]:
# ANNClassifier = grid_search.best_estimator_
# pred = ANNClassifier.predict(X_test)
# buid_prediction(pred)

## Random Forest

In [193]:
rf = RandomForestClassifier(400)

rf.fit(X_train, y_train)
pred= rf.predict_proba(X_test) if TEST else rf.predict(X_test)
build_prediction(pred)

[[0.06   0.94  ]
 [0.1575 0.8425]
 [0.08   0.92  ]
 [0.7025 0.2975]
 [0.175  0.825 ]
 [0.065  0.935 ]
 [0.055  0.945 ]
 [0.055  0.945 ]
 [0.16   0.84  ]
 [0.105  0.895 ]
 [0.1175 0.8825]
 [0.135  0.865 ]
 [0.1825 0.8175]
 [0.09   0.91  ]
 [0.0875 0.9125]
 [0.3475 0.6525]
 [0.3075 0.6925]
 [0.21   0.79  ]
 [0.42   0.58  ]
 [0.2925 0.7075]
 [0.0625 0.9375]
 [0.1075 0.8925]
 [0.76   0.24  ]
 [0.2025 0.7975]
 [0.315  0.685 ]
 [0.2225 0.7775]
 [0.21   0.79  ]
 [0.33   0.67  ]
 [0.1675 0.8325]
 [0.115  0.885 ]
 [0.2    0.8   ]
 [0.24   0.76  ]
 [0.1975 0.8025]
 [0.145  0.855 ]
 [0.06   0.94  ]
 [0.05   0.95  ]
 [0.0725 0.9275]
 [0.265  0.735 ]
 [0.0925 0.9075]
 [0.1175 0.8825]
 [0.0725 0.9275]
 [0.1425 0.8575]
 [0.0975 0.9025]
 [0.085  0.915 ]
 [0.185  0.815 ]
 [0.2675 0.7325]
 [0.72   0.28  ]
 [0.17   0.83  ]
 [0.1425 0.8575]
 [0.2425 0.7575]
 [0.225  0.775 ]
 [0.12   0.88  ]
 [0.3525 0.6475]
 [0.0425 0.9575]
 [0.08   0.92  ]
 [0.065  0.935 ]
 [0.1425 0.8575]
 [0.2975 0.7025]
 [0.175  0.825

Unnamed: 0,Id,Predicted
0,5895,0.9400
1,5172,0.8425
2,6207,0.9200
3,7122,0.2975
4,7067,0.8250
...,...,...
349,7294,0.9100
350,6321,0.8650
351,6469,0.8750
352,5614,0.9400


## Gaussian Naive Bayes

In [194]:
clf = GaussianNB()
clf.fit(X_train, y_train)
pred= clf.predict_proba(X_test) if TEST else clf.predict(X_test)
build_prediction(pred)

[[5.60790356e-06 9.99994392e-01]
 [7.02431398e-06 9.99992976e-01]
 [9.94071877e-06 9.99990059e-01]
 [1.00000000e+00 0.00000000e+00]
 [3.29248430e-07 9.99999671e-01]
 [5.40364723e-05 9.99945964e-01]
 [2.40373232e-06 9.99997596e-01]
 [2.35173716e-08 9.99999976e-01]
 [2.17274862e-07 9.99999783e-01]
 [6.94669809e-06 9.99993053e-01]
 [4.49272004e-06 9.99995507e-01]
 [3.47522258e-06 9.99996525e-01]
 [1.41595873e-07 9.99999858e-01]
 [1.12052098e-06 9.99998879e-01]
 [3.34418151e-07 9.99999666e-01]
 [1.42412797e-05 9.99985759e-01]
 [2.88268888e-06 9.99997117e-01]
 [2.23939973e-07 9.99999776e-01]
 [1.91873022e-06 9.99998081e-01]
 [1.04438873e-05 9.99989556e-01]
 [2.68799654e-06 9.99997312e-01]
 [1.56673844e-06 9.99998433e-01]
 [1.00000000e+00 0.00000000e+00]
 [1.32137442e-09 9.99999999e-01]
 [1.63132276e-09 9.99999998e-01]
 [1.79232057e-09 9.99999998e-01]
 [1.29389668e-09 9.99999999e-01]
 [1.51486120e-09 9.99999998e-01]
 [8.47695217e-07 9.99999152e-01]
 [2.44632498e-07 9.99999755e-01]
 [1.035854

Unnamed: 0,Id,Predicted
0,5895,0.999994
1,5172,0.999993
2,6207,0.999990
3,7122,0.000000
4,7067,1.000000
...,...,...
349,7294,1.000000
350,6321,1.000000
351,6469,1.000000
352,5614,1.000000


## Stochastic gradient descent

In [195]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

clf.fit(X_train, y_train)
pred= clf.predict_proba(X_test) if TEST else clf.predict(X_test)
build_prediction(pred)

[[5.60790356e-06 9.99994392e-01]
 [7.02431398e-06 9.99992976e-01]
 [9.94071877e-06 9.99990059e-01]
 [1.00000000e+00 0.00000000e+00]
 [3.29248430e-07 9.99999671e-01]
 [5.40364723e-05 9.99945964e-01]
 [2.40373232e-06 9.99997596e-01]
 [2.35173716e-08 9.99999976e-01]
 [2.17274862e-07 9.99999783e-01]
 [6.94669809e-06 9.99993053e-01]
 [4.49272004e-06 9.99995507e-01]
 [3.47522258e-06 9.99996525e-01]
 [1.41595873e-07 9.99999858e-01]
 [1.12052098e-06 9.99998879e-01]
 [3.34418151e-07 9.99999666e-01]
 [1.42412797e-05 9.99985759e-01]
 [2.88268888e-06 9.99997117e-01]
 [2.23939973e-07 9.99999776e-01]
 [1.91873022e-06 9.99998081e-01]
 [1.04438873e-05 9.99989556e-01]
 [2.68799654e-06 9.99997312e-01]
 [1.56673844e-06 9.99998433e-01]
 [1.00000000e+00 0.00000000e+00]
 [1.32137442e-09 9.99999999e-01]
 [1.63132276e-09 9.99999998e-01]
 [1.79232057e-09 9.99999998e-01]
 [1.29389668e-09 9.99999999e-01]
 [1.51486120e-09 9.99999998e-01]
 [8.47695217e-07 9.99999152e-01]
 [2.44632498e-07 9.99999755e-01]
 [1.035854

Unnamed: 0,Id,Predicted
0,5895,0.999994
1,5172,0.999993
2,6207,0.999990
3,7122,0.000000
4,7067,1.000000
...,...,...
349,7294,1.000000
350,6321,1.000000
351,6469,1.000000
352,5614,1.000000


## Multi-layer Perceptron classifier

In [196]:
clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)
clf.fit(X_train, y_train)
pred= clf.predict_proba(X_test) if TEST else clf.predict(X_test)
build_prediction(pred)

[[6.28804187e-05 9.99937120e-01]
 [1.54062750e-03 9.98459372e-01]
 [5.17094994e-05 9.99948291e-01]
 [9.99988342e-01 1.16578943e-05]
 [2.97993472e-03 9.97020065e-01]
 [1.48279181e-05 9.99985172e-01]
 [4.98422122e-06 9.99995016e-01]
 [4.00417522e-07 9.99999600e-01]
 [4.02818607e-07 9.99999597e-01]
 [1.05647552e-05 9.99989435e-01]
 [1.24591025e-06 9.99998754e-01]
 [2.77271994e-02 9.72272801e-01]
 [4.69869806e-05 9.99953013e-01]
 [6.13768327e-04 9.99386232e-01]
 [1.27233817e-06 9.99998728e-01]
 [5.29132113e-01 4.70867887e-01]
 [4.82767502e-01 5.17232498e-01]
 [2.15004627e-02 9.78499537e-01]
 [1.00968220e-01 8.99031780e-01]
 [2.74367191e-04 9.99725633e-01]
 [6.35226949e-04 9.99364773e-01]
 [5.56724555e-05 9.99944328e-01]
 [9.99999998e-01 1.55146394e-09]
 [2.20728249e-04 9.99779272e-01]
 [7.52544942e-04 9.99247455e-01]
 [4.33666254e-04 9.99566334e-01]
 [6.41029152e-04 9.99358971e-01]
 [1.01031879e-03 9.98989681e-01]
 [1.60256815e-05 9.99983974e-01]
 [8.13595178e-04 9.99186405e-01]
 [1.298587

Unnamed: 0,Id,Predicted
0,5895,0.999937
1,5172,0.998459
2,6207,0.999948
3,7122,0.000012
4,7067,0.997020
...,...,...
349,7294,0.999995
350,6321,0.985392
351,6469,0.999956
352,5614,0.999987


## Gradient Boosting Classifier

In [197]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,max_depth=1, random_state=0).fit(X_train, y_train)
pred= clf.predict_proba(X_test) if TEST else clf.predict(X_test)
build_prediction(pred)

[[0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 

Unnamed: 0,Id,Predicted
0,5895,1.0
1,5172,1.0
2,6207,1.0
3,7122,1.0
4,7067,1.0
...,...,...
349,7294,1.0
350,6321,1.0
351,6469,1.0
352,5614,1.0


## SMOTE 