In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, confusion_matrix, precision_score

In [58]:
true_neg = make_scorer(lambda y, y_pred: confusion_matrix(y, y_pred)[0][0])
false_neg = make_scorer(lambda y, y_pred: confusion_matrix(y, y_pred)[1][0])
true_pos = make_scorer(lambda y, y_pred: confusion_matrix(y, y_pred)[1][1])
false_pos = make_scorer(lambda y, y_pred: confusion_matrix(y, y_pred)[0][1])
precision = make_scorer(precision_score, zero_division=0)

SCORING = {
    "roc_auc":"roc_auc",
    "accuracy":"accuracy",
    "recall": "recall",
    "precision": precision,
    "true_pos": true_pos,
    "true_neg": true_neg,
    "false_pos": false_pos,
    "false_neg": false_neg
}

SCORE_FEATURES = [
       'mean_test_roc_auc', 'std_test_roc_auc',
       'mean_train_roc_auc', 'std_train_roc_auc',
       'mean_test_accuracy', 'std_test_accuracy',
       'mean_train_accuracy', 'std_train_accuracy',
       'mean_test_recall', 'std_test_recall',
       'mean_train_recall', 'std_train_recall',
       'mean_test_precision', 'std_test_precision',
       'mean_train_precision', 'std_train_precision',
       'mean_test_true_pos', 'std_test_true_pos',
       'mean_train_true_pos', 'std_train_true_pos',
       'mean_test_true_neg', 'std_test_true_neg',
       'mean_train_true_neg', 'std_train_true_neg',
       'mean_test_false_pos', 'std_test_false_pos',
       'mean_train_false_pos', 'std_train_false_pos',
       'mean_test_false_neg', 'std_test_false_neg',
       'mean_train_false_neg', 'std_train_false_neg'
]

def test_model(clf, X, y, param_grid, **kwargs):
    search_fit = GridSearchCV(clf,
                              param_grid,
                              scoring=SCORING,
                              refit="roc_auc",
                              return_train_score=True).fit(X, y, **kwargs)
    search_results = pd.DataFrame(search_fit.cv_results_)[SCORE_FEATURES]
    return search_fit.best_params_, search_results.iloc[search_fit.best_index_]

In [4]:
from sklearn.neighbors import KNeighborsClassifier

def nns(X, y, balanced=False):
    clf = KNeighborsClassifier(n_jobs=-2)
    param_grid = {'n_neighbors': np.arange(1,10),
                    'weights': ['uniform','distance'],
                    'metric':['euclidean','manhattan']}
    return test_model(clf, X, y, param_grid)

In [52]:
from sklearn.svm import SVC

def svm(X, y, balanced=False):
    clf = SVC()
    param_grid = {'kernel': ['linear','rbf'],
                  'C': np.logspace(2,4,2), # np.logspace(2,5,6)
                  'gamma': np.logspace(-4,0.5,1)} # np.logspace(-4,0.5,10)}
    if balanced:
        param_grid["class_weight"] = ["balanced"],
    return test_model(clf, X, y, param_grid)

In [6]:
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

def gp(X, y, balanced=False):
    clf = GaussianProcessClassifier(random_state=0, n_jobs=-2)
    param_grid = {'kernel': [1.0 * RBF(1.0)]}
    return test_model(clf, X, y, param_grid)

In [59]:
from sklearn.ensemble import RandomForestClassifier

def rfc(X, y, balanced=False):
    clf = RandomForestClassifier(n_estimators=100)
    param_grid = {'max_depth': [4, 6],
                  'min_samples_leaf': [3,5,9,17],
                  'max_features': [0.3]}
    if balanced:
        param_grid["class_weight"] = ["balanced"],
    return test_model(clf, X, y, param_grid)

In [63]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.utils.class_weight import compute_class_weight

def gbc(X, y, balanced=False):
    clf = GradientBoostingClassifier(n_estimators=100,random_state=0)
    param_grid = {'learning_rate': [0.1, 0.05, 0.02, 0.01],
                    'max_depth': [3,4,6],
                    'min_samples_leaf': [3,5,9,17],
                    'max_features': [x for x in np.linspace(0.2,0.4,4)]}
    if balanced:
        pos_weight, neg_weight = compute_class_weight(class_weight="balanced",
                                                      classes=[1,0],
                                                      y=y)
        y_weights = y.apply(lambda y: pos_weight if y else neg_weight)
    else:
        y_weights = np.ones(y.shape)

    return test_model(clf, X, y, param_grid, sample_weight=y_weights)

In [64]:
gbc(X_train_ohe, y_train)

({'learning_rate': 0.1,
  'max_depth': 4,
  'max_features': 0.26666666666666666,
  'min_samples_leaf': 3},
 mean_test_roc_auc         0.603783
 std_test_roc_auc          0.093483
 mean_train_roc_auc        1.000000
 std_train_roc_auc         0.000000
 mean_test_accuracy        0.808696
 std_test_accuracy         0.024934
 mean_train_accuracy       0.994203
 std_train_accuracy        0.003695
 mean_test_recall          0.101515
 std_test_recall           0.081396
 mean_train_recall         0.966223
 std_train_recall          0.021336
 mean_test_precision       0.326667
 std_test_precision        0.213333
 mean_train_precision      1.000000
 std_train_precision       0.000000
 mean_test_true_pos        1.200000
 std_test_true_pos         0.979796
 mean_train_true_pos      45.600000
 std_train_true_pos        0.800000
 mean_test_true_neg       54.600000
 std_test_true_neg         1.019804
 mean_train_true_neg     228.800000
 std_train_true_neg        0.400000
 mean_test_false_pos       2.

In [65]:
gbc(X_train_ohe, y_train, balanced=True)

({'learning_rate': 0.05,
  'max_depth': 4,
  'max_features': 0.4,
  'min_samples_leaf': 17},
 mean_test_roc_auc         0.589000
 std_test_roc_auc          0.082245
 mean_train_roc_auc        0.987245
 std_train_roc_auc         0.003002
 mean_test_accuracy        0.724638
 std_test_accuracy         0.035500
 mean_train_accuracy       0.924638
 std_train_accuracy        0.007390
 mean_test_recall          0.322727
 std_test_recall           0.144584
 mean_train_recall         1.000000
 std_train_recall          0.000000
 mean_test_precision       0.247165
 std_test_precision        0.088026
 mean_train_precision      0.694706
 std_train_precision       0.021783
 mean_test_true_pos        3.800000
 std_test_true_pos         1.720465
 mean_train_true_pos      47.200000
 std_train_true_pos        0.400000
 mean_test_true_neg       46.200000
 std_test_true_neg         2.785678
 mean_train_true_neg     208.000000
 std_train_true_neg        1.897367
 mean_test_false_pos      11.000000
 std_te

In [66]:
from sklearn.ensemble import AdaBoostClassifier

def ab(X, y, balanced=False):
    clf = AdaBoostClassifier(random_state=0)
    param_grid = {'n_estimators': [100,200],
                  'learning_rate': [0.001,0.01,0.1,0.2,0.5]}
    if balanced:
        pos_weight, neg_weight = compute_class_weight(class_weight="balanced",
                                                      classes=[1,0],
                                                      y=y)
        y_weights = y.apply(lambda y: pos_weight if y else neg_weight)
    else:
        y_weights = np.ones(y.shape)

    return test_model(clf, X, y, param_grid, sample_weight=y_weights)

In [67]:
ab(X_train_ohe, y_train)

({'learning_rate': 0.2, 'n_estimators': 200},
 mean_test_roc_auc         0.590280
 std_test_roc_auc          0.106138
 mean_train_roc_auc        0.947166
 std_train_roc_auc         0.013459
 mean_test_accuracy        0.814493
 std_test_accuracy         0.042402
 mean_train_accuracy       0.874638
 std_train_accuracy        0.013672
 mean_test_recall          0.101515
 std_test_recall           0.081396
 mean_train_recall         0.326064
 std_train_recall          0.072066
 mean_test_precision       0.500000
 std_test_precision        0.353553
 mean_train_precision      0.842628
 std_train_precision       0.076936
 mean_test_true_pos        1.200000
 std_test_true_pos         0.979796
 mean_train_true_pos      15.400000
 std_train_true_pos        3.440930
 mean_test_true_neg       55.000000
 std_test_true_neg         2.280351
 mean_train_true_neg     226.000000
 std_train_true_neg        1.264911
 mean_test_false_pos       2.200000
 std_test_false_pos        2.135416
 mean_train_false_

In [68]:
ab(X_train_ohe, y_train, balanced=True)

({'learning_rate': 0.1, 'n_estimators': 200},
 mean_test_roc_auc         0.587900
 std_test_roc_auc          0.092174
 mean_train_roc_auc        0.917463
 std_train_roc_auc         0.017252
 mean_test_accuracy        0.689855
 std_test_accuracy         0.045462
 mean_train_accuracy       0.802899
 std_train_accuracy        0.017240
 mean_test_recall          0.425758
 std_test_recall           0.187561
 mean_train_recall         0.868528
 std_train_recall          0.041540
 mean_test_precision       0.248419
 std_test_precision        0.085682
 mean_train_precision      0.460335
 std_train_precision       0.026577
 mean_test_true_pos        5.000000
 std_test_true_pos         2.190890
 mean_train_true_pos      41.000000
 std_train_true_pos        2.097618
 mean_test_true_neg       42.600000
 std_test_true_neg         3.072458
 mean_train_true_neg     180.600000
 std_train_true_neg        4.127953
 mean_test_false_pos      14.600000
 std_test_false_pos        3.006659
 mean_train_false_

In [71]:
from sklearn.naive_bayes import GaussianNB

def nb(X, y, balanced=False):
    clf = GaussianNB()
    param_grid = {'var_smoothing':  np.logspace(-11,-3,9,base=10)}
    if balanced:
        pos_weight, neg_weight = compute_class_weight(class_weight="balanced",
                                                      classes=[1,0],
                                                      y=y)
        y_weights = y.apply(lambda y: pos_weight if y else neg_weight)
    else:
        y_weights = np.ones(y.shape)
    return test_model(clf, X, y, param_grid, sample_weight=y_weights)

In [72]:
nb(X_train_ohe, y_train)

({'var_smoothing': 0.001},
 mean_test_roc_auc         0.533499
 std_test_roc_auc          0.072551
 mean_train_roc_auc        0.718361
 std_train_roc_auc         0.016121
 mean_test_accuracy        0.797101
 std_test_accuracy         0.022452
 mean_train_accuracy       0.830435
 std_train_accuracy        0.009559
 mean_test_recall          0.083333
 std_test_recall           0.091287
 mean_train_recall         0.182358
 std_train_recall          0.044308
 mean_test_precision       0.219048
 std_test_precision        0.182201
 mean_train_precision      0.519149
 std_train_precision       0.077738
 mean_test_true_pos        1.000000
 std_test_true_pos         1.095445
 mean_train_true_pos       8.600000
 std_train_true_pos        2.059126
 mean_test_true_neg       54.000000
 std_test_true_neg         1.264911
 mean_train_true_neg     220.600000
 std_train_true_neg        2.653300
 mean_test_false_pos       3.200000
 std_test_false_pos        1.600000
 mean_train_false_pos      8.200000
 

In [73]:
nb(X_train_ohe, y_train, balanced=True)

({'var_smoothing': 0.001},
 mean_test_roc_auc         0.533499
 std_test_roc_auc          0.072551
 mean_train_roc_auc        0.718361
 std_train_roc_auc         0.016121
 mean_test_accuracy        0.684058
 std_test_accuracy         0.059685
 mean_train_accuracy       0.734058
 std_train_accuracy        0.050079
 mean_test_recall          0.337879
 std_test_recall           0.102158
 mean_train_recall         0.551152
 std_train_recall          0.088782
 mean_test_precision       0.230590
 std_test_precision        0.046357
 mean_train_precision      0.341162
 std_train_precision       0.031857
 mean_test_true_pos        4.000000
 std_test_true_pos         1.264911
 mean_train_true_pos      26.000000
 std_train_true_pos        4.098780
 mean_test_true_neg       43.200000
 std_test_true_neg         5.114685
 mean_train_true_neg     176.600000
 std_train_true_neg       17.816846
 mean_test_false_pos      14.000000
 std_test_false_pos        5.176872
 mean_train_false_pos     52.200000
 

In [11]:
from sklearn.linear_model import LogisticRegression

def lr(X, y, balanced=False):
    clf = LogisticRegression(random_state=0, max_iter=10000)
    param_grid = {'penalty' : ['l2'],
                  'solver': ["liblinear"],
                  'C' : np.logspace(-4, 4, 20)}
    if balanced:
        param_grid["class_weight"] = ["balanced"],
    return test_model(clf, X, y, param_grid)

In [48]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

def qda(X, y):
    clf = QuadraticDiscriminantAnalysis()
    param_grid = {'reg_param':  [0.0]}
    return test_model(clf, X, y, param_grid)

In [50]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

def lda(X, y):
    clf = LinearDiscriminantAnalysis()
    param_grid = {'solver':  ["svd", "lsqr", "eigen"],
                  "shrinkage": [None, "auto", 0.1, 0.3, 0.8, 1]}
    return test_model(clf, X, y, param_grid)

In [51]:
lda(X_train_ohe, y_train)

({'shrinkage': 0.3, 'solver': 'lsqr'},
 mean_test_roc_auc         0.547332
 std_test_roc_auc          0.080731
 mean_train_roc_auc        0.666632
 std_train_roc_auc         0.020374
 mean_test_accuracy        0.828986
 std_test_accuracy         0.005797
 mean_train_accuracy       0.834783
 std_train_accuracy        0.002899
 mean_test_recall          0.016667
 std_test_recall           0.033333
 mean_train_recall         0.046454
 std_train_recall          0.033649
 mean_test_precision       0.200000
 std_test_precision        0.400000
 mean_train_precision      0.693333
 std_train_precision       0.369023
 mean_test_true_pos        0.200000
 std_test_true_pos         0.400000
 mean_train_true_pos       2.200000
 std_train_true_pos        1.600000
 mean_test_true_neg       57.000000
 std_test_true_neg         0.000000
 mean_train_true_neg     228.200000
 std_train_true_neg        0.979796
 mean_test_false_pos       0.200000
 std_test_false_pos        0.400000
 mean_train_false_pos    

In [14]:
data_dir = "../data/train_test_data/"

X_train_ohe = pd.read_pickle(data_dir + "X_train_ohe.pkl")
X_train_ohe_scaled = pd.read_pickle(data_dir + "X_train_ohe_scaled.pkl")
y_train = pd.read_pickle(data_dir + "y_train.pkl")

techniques_dict = {'K Nearest Neighbours': nns, 'Support Vector Machines': svm,
                   'Gaussian Process': gp, 'Random Forest Classifier': rfc,
                   'Gradient Boosting Classifier': gbc,  'Ada Boost classifier': ab,
                   'Gaussian Naieve Bayes': nb, 'Logistic Regression': lr,
                   "Linear Discriminant Analysis": lda,
                   'Quadratic Discriminant Analysis': qda}

cv_results_list = []
best_params_dict = {}
for model_type, cv_model_func in techniques_dict.items():
    print(f"fitting {model_type}......")
    if cv_model_func in [nns, svm, gp, lr, lda, qda]:
        best_params, cv_results = cv_model_func(X_train_ohe_scaled, y_train)
        best_params_dict[model_type] = best_params
        cv_results_list.append(cv_results)
    else: # don't normalise x
        best_params, cv_results = cv_model_func(X_train_ohe, y_train)
        best_params_dict[model_type] = best_params
        cv_results_list.append(cv_results)
    print("done.")

cv_results_df = pd.DataFrame(cv_results_list,
                             index=techniques_dict.keys())
cv_results_df

fitting K Nearest Neighbours......
done.
fitting Support Vector Machines......
done.
fitting Gaussian Process......
done.
fitting Random Forest Classifier......
done.
fitting Gradient Boosting Classifier......
done.
fitting Ada Boost classifier......
done.
fitting Gaussian Naieve Bayes......
done.
fitting Logistic Regression......
done.
fitting Quadratic Discriminant Analysis......
done.


Unnamed: 0,mean_test_accuracy,std_test_accuracy,mean_train_accuracy,std_train_accuracy,mean_test_recall,std_test_recall,mean_train_recall,std_train_recall,mean_test_precision,std_test_precision,...,mean_train_true_neg,std_train_true_neg,mean_test_false_pos,std_test_false_pos,mean_train_false_pos,std_train_false_pos,mean_test_false_neg,std_test_false_neg,mean_train_false_neg,std_train_false_neg
K Nearest Neighbours,0.834783,0.011594,1.0,0.0,0.034848,0.042748,1.0,0.0,0.4,0.489898,...,228.8,0.4,0.0,0.0,0.0,0.0,11.4,0.8,0.0,0.0
Support Vector Machines,0.828986,0.005797,0.828986,0.001449,0.0,0.0,0.0,0.0,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,47.2,0.4
Gaussian Process,0.828986,0.005797,0.828986,0.001449,0.0,0.0,0.0,0.0,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,47.2,0.4
Random Forest Classifier,0.828986,0.005797,0.832609,0.004225,0.0,0.0,0.021099,0.026775,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,46.2,1.16619
Gradient Boosting Classifier,0.831884,0.0071,0.863768,0.005423,0.033333,0.040825,0.203457,0.029255,0.4,0.489898,...,228.8,0.4,0.2,0.4,0.0,0.0,11.4,0.489898,37.6,1.496663
Ada Boost classifier,0.828986,0.005797,0.831884,0.002899,0.0,0.0,0.016933,0.015899,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,46.4,0.8
Gaussian Naieve Bayes,0.797101,0.022452,0.830435,0.009559,0.083333,0.091287,0.182358,0.044308,0.219048,0.182201,...,220.6,2.6533,3.2,1.6,8.2,2.481935,10.8,0.979796,38.6,2.244994
Logistic Regression,0.828986,0.005797,0.828986,0.001449,0.0,0.0,0.0,0.0,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,47.2,0.4
Quadratic Discriminant Analysis,0.817391,0.011594,0.841304,0.0142,0.016667,0.033333,0.071277,0.089741,0.066667,0.133333,...,228.8,0.4,1.0,0.894427,0.0,0.0,11.6,0.489898,43.8,3.919184


In [15]:
display_features = [
    "mean_train_accuracy", "mean_test_accuracy",
    "mean_train_precision", "mean_train_recall",
    "mean_test_precision", "mean_test_recall",
    "mean_test_true_neg", "mean_test_false_neg",
    "mean_test_true_pos", "mean_test_false_pos",
]

cv_results_df[display_features]

Unnamed: 0,mean_train_accuracy,mean_test_accuracy,mean_train_precision,mean_train_recall,mean_test_precision,mean_test_recall,mean_test_true_neg,mean_test_false_neg,mean_test_true_pos,mean_test_false_pos
K Nearest Neighbours,1.0,0.834783,1.0,1.0,0.4,0.034848,57.2,11.4,0.4,0.0
Support Vector Machines,0.828986,0.828986,0.0,0.0,0.0,0.0,57.2,11.8,0.0,0.0
Gaussian Process,0.828986,0.828986,0.0,0.0,0.0,0.0,57.2,11.8,0.0,0.0
Random Forest Classifier,0.832609,0.828986,0.4,0.021099,0.0,0.0,57.2,11.8,0.0,0.0
Gradient Boosting Classifier,0.863768,0.831884,1.0,0.203457,0.4,0.033333,57.0,11.4,0.4,0.2
Ada Boost classifier,0.831884,0.828986,0.6,0.016933,0.0,0.0,57.2,11.8,0.0,0.0
Gaussian Naieve Bayes,0.830435,0.797101,0.519149,0.182358,0.219048,0.083333,54.0,10.8,1.0,3.2
Logistic Regression,0.828986,0.828986,0.0,0.0,0.0,0.0,57.2,11.8,0.0,0.0
Quadratic Discriminant Analysis,0.841304,0.817391,1.0,0.071277,0.066667,0.016667,56.2,11.6,0.2,1.0


In [18]:
data_dir = "../data/train_test_data/"

X_train_res_ohe = pd.read_pickle(data_dir + "X_train_res_ohe.pkl")
X_train_res_ohe_scaled = pd.read_pickle(data_dir + "X_train_res_ohe_scaled.pkl")
y_train_res = pd.read_pickle(data_dir + "y_train_res.pkl")

techniques_dict = {'K Nearest Neighbours': nns, 'Support Vector Machines': svm,
                   'Gaussian Process': gp, 'Random Forest Classifier': rfc,
                   'Gradient Boosting Classifier': gbc,  'Ada Boost classifier': ab,
                   'Gaussian Naieve Bayes': nb, 'Logistic Regression': lr,
                   'Quadratic Discriminant Analysis': qda}

res_cv_results_list = []
res_best_params_dict = {}
for model_type, cv_model_func in techniques_dict.items():
    print(f"fitting {model_type}......")
    if cv_model_func in [nns, svm, gp, lr, lda, qda]:
        best_params, cv_results = cv_model_func(X_train_res_ohe_scaled, y_train_res)
        res_best_params_dict[model_type] = best_params
        res_cv_results_list.append(cv_results)
    else: # don't normalise x
        best_params, cv_results = cv_model_func(X_train_res_ohe, y_train_res)
        res_best_params_dict[model_type] = best_params
        res_cv_results_list.append(cv_results)
    print("done.")

res_cv_results_df = pd.DataFrame(res_cv_results_list,
                                 index=techniques_dict.keys())

fitting K Nearest Neighbours......
done.
fitting Support Vector Machines......
done.
fitting Gaussian Process......
done.
fitting Random Forest Classifier......
done.
fitting Gradient Boosting Classifier......
done.
fitting Ada Boost classifier......
done.
fitting Gaussian Naieve Bayes......
done.
fitting Logistic Regression......
done.
fitting Quadratic Discriminant Analysis......
done.


Unnamed: 0,mean_test_accuracy,std_test_accuracy,mean_train_accuracy,std_train_accuracy,mean_test_recall,std_test_recall,mean_train_recall,std_train_recall,mean_test_precision,std_test_precision,...,mean_train_true_neg,std_train_true_neg,mean_test_false_pos,std_test_false_pos,mean_train_false_pos,std_train_false_pos,mean_test_false_neg,std_test_false_neg,mean_train_false_neg,std_train_false_neg
K Nearest Neighbours,0.834783,0.011594,1.0,0.0,0.034848,0.042748,1.0,0.0,0.4,0.489898,...,228.8,0.4,0.0,0.0,0.0,0.0,11.4,0.8,0.0,0.0
Support Vector Machines,0.828986,0.005797,0.828986,0.001449,0.0,0.0,0.0,0.0,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,47.2,0.4
Gaussian Process,0.828986,0.005797,0.828986,0.001449,0.0,0.0,0.0,0.0,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,47.2,0.4
Random Forest Classifier,0.828986,0.005797,0.832609,0.004225,0.0,0.0,0.021099,0.026775,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,46.2,1.16619
Gradient Boosting Classifier,0.831884,0.0071,0.863768,0.005423,0.033333,0.040825,0.203457,0.029255,0.4,0.489898,...,228.8,0.4,0.2,0.4,0.0,0.0,11.4,0.489898,37.6,1.496663
Ada Boost classifier,0.828986,0.005797,0.831884,0.002899,0.0,0.0,0.016933,0.015899,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,46.4,0.8
Gaussian Naieve Bayes,0.797101,0.022452,0.830435,0.009559,0.083333,0.091287,0.182358,0.044308,0.219048,0.182201,...,220.6,2.6533,3.2,1.6,8.2,2.481935,10.8,0.979796,38.6,2.244994
Logistic Regression,0.828986,0.005797,0.828986,0.001449,0.0,0.0,0.0,0.0,0.0,0.0,...,228.8,0.4,0.0,0.0,0.0,0.0,11.8,0.4,47.2,0.4
Quadratic Discriminant Analysis,0.817391,0.011594,0.841304,0.0142,0.016667,0.033333,0.071277,0.089741,0.066667,0.133333,...,228.8,0.4,1.0,0.894427,0.0,0.0,11.6,0.489898,43.8,3.919184


In [21]:
res_cv_results_df = pd.DataFrame(res_cv_results_list,
                                 index=techniques_dict.keys())

pd.set_option("display.max_columns", None)
res_cv_results_df

Unnamed: 0,mean_test_accuracy,std_test_accuracy,mean_train_accuracy,std_train_accuracy,mean_test_recall,std_test_recall,mean_train_recall,std_train_recall,mean_test_precision,std_test_precision,mean_train_precision,std_train_precision,mean_test_true_pos,std_test_true_pos,mean_train_true_pos,std_train_true_pos,mean_test_true_neg,std_test_true_neg,mean_train_true_neg,std_train_true_neg,mean_test_false_pos,std_test_false_pos,mean_train_false_pos,std_train_false_pos,mean_test_false_neg,std_test_false_neg,mean_train_false_neg,std_train_false_neg
K Nearest Neighbours,0.799161,0.091665,0.949752,0.022145,0.824924,0.211814,0.899456,0.044329,0.776295,0.046845,1.0,0.0,47.2,12.139193,205.8,10.244999,44.2,2.925748,228.8,0.4,13.0,2.683282,0.0,0.0,10.0,12.066483,23.0,10.139033
Support Vector Machines,0.748513,0.109712,0.847046,0.034454,0.80369,0.229235,0.89946,0.037613,0.705949,0.082772,0.81401,0.03011,46.0,13.190906,205.8,8.704022,39.6,1.019804,181.8,7.249828,17.6,0.8,47.0,7.615773,11.2,13.059862,23.0,8.602325
Gaussian Process,0.804546,0.129062,0.99126,0.003654,0.789837,0.308617,0.995629,0.003906,0.790737,0.063836,0.987002,0.003872,45.2,17.656727,227.8,0.979796,46.8,3.37046,225.8,0.748331,10.4,3.006659,3.0,0.894427,12.0,17.584084,1.0,0.894427
Random Forest Classifier,0.828955,0.126832,0.9091,0.014866,0.873866,0.218453,0.947548,0.026785,0.789074,0.10083,0.880361,0.018717,50.0,12.521981,216.8,6.177378,44.8,4.166533,199.2,5.6,12.4,4.317407,29.6,5.4626,7.2,12.448293,12.0,6.131884
Gradient Boosting Classifier,0.846499,0.130142,1.0,0.0,0.828252,0.318266,1.0,0.0,0.869949,0.043754,1.0,0.0,47.4,18.216476,228.8,0.4,49.4,4.841487,228.8,0.4,7.8,4.534314,0.0,0.0,9.8,18.137254,0.0,0.0
Ada Boost classifier,0.797529,0.114798,0.926595,0.024814,0.817846,0.26953,0.934444,0.030247,0.771731,0.063102,0.91997,0.022535,46.8,15.432433,213.8,6.910861,44.4,4.317407,210.2,5.035871,12.8,4.069398,18.6,5.314132,10.4,15.357083,15.0,6.928203
Gaussian Naieve Bayes,0.725782,0.125505,0.766185,0.031478,0.828252,0.23946,0.888968,0.036558,0.668393,0.104228,0.713578,0.024141,47.4,13.749182,203.4,8.475848,35.6,2.244994,147.2,6.4,21.6,2.332381,81.6,6.74092,9.8,13.644046,25.4,8.357033
Logistic Regression,0.750328,0.119565,0.840494,0.035037,0.810829,0.256861,0.883724,0.043071,0.700898,0.094759,0.813002,0.027682,46.4,14.718696,202.2,9.947864,39.4,2.416609,182.4,6.08605,17.8,2.227106,46.4,6.468385,10.8,14.634207,26.6,9.850888
Quadratic Discriminant Analysis,0.683738,0.111873,0.753954,0.028103,0.866788,0.205733,0.944055,0.033759,0.625408,0.086105,0.6848,0.024741,49.6,11.825396,216.0,7.745967,28.6,5.083306,129.0,11.798305,28.6,5.083306,99.8,11.956588,7.6,11.723481,12.8,7.730459


In [57]:

def functioney(a, b, **kwargs):


functioney("A", "B", d="C", e="D", f="£")

a is A
b is B
the rest is {'d': 'C', 'e': 'D', 'f': '£'}
