In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Reading in and preprocessing data

In [None]:
# Reading in data from the 50-50 readmit vs. nonreadmit dataset

df = pd.read_csv('dfd.csv')
df.head()

In [None]:
# Dropping certain columns

# Dropping ID numbers and dates
df = df.drop(columns=['subject_id', 'hadm_id', 'admittime', 'dischtime'])

# Dropping labevents and chartevents values of less importance based on feature selection (2/3 values for each measurement)
df = df.drop(columns=['rdw_min', 'rdw_max', 'hemoglobin_min', 'hemoglobin_max', 'creatinine_median', 'creatinine_min', 
                      'hematocrit_median', 'hematocrit_min', 'tempc_median', 'tempc_max', 'resprate_median', 
                      'resprate_min', 'wbc_median', 'wbc_max', 'inr_min', 'inr_median', 'ptt_median', 'ptt_max', 
                      'lactate_median', 'lactate_max', 'sysbp_median', 'sysbp_min', 'spo2_median', 'spo2_max', 
                      'bilirubin_median', 'bilirubin_max', 'platelet_median', 'platelet_max', 'heartrate_min',
                      'heartrate_median'])
df.head()

In [None]:
# Converting categorical features into dummy variables

df_converted = pd.get_dummies(df)
df_converted.head()

In [None]:
# Splitting dataframe into data (predictors) vs. label (attributed to be predicted)

label_df = df_converted.pop('followed_by_readmit')
data_df = df_converted
print('label_df:\n', label_df.head(), 2*'\n', 'data_df:\n', data_df.head())

In [6]:
# Converting dataframes to NumPy arrays

label = label_df.values
data = data_df.values

In [7]:
label

array([ True,  True,  True, ..., False, False, False])

In [8]:
data

array([[70, 8, 0, ..., 0, 0, 0],
       [42, 19, 0, ..., 0, 0, 0],
       [60, 8, 0, ..., 0, 1, 0],
       ...,
       [74, 19, 0, ..., 0, 0, 0],
       [67, 7, 0, ..., 0, 0, 0],
       [59, 9, 0, ..., 0, 0, 0]], dtype=object)

## Train/test split

In [9]:
# 80/20 train-test split

from sklearn.model_selection import train_test_split

train_data, test_data, train_label, test_label = train_test_split(data, label, train_size=0.8, test_size=0.2, random_state=10)

print('Training data:', train_data.shape, '\tTest data:', test_data.shape)
print('Training labels:', train_label.shape, '\tTest labels:', test_label.shape)

Training data: (4873, 110) 	Test data: (1219, 110)
Training labels: (4873,) 	Test labels: (1219,)


In [10]:
# Cross-validation on the training set (no need to do explicitly for Logistic Reg. since has a cv parameter, but just in case...)

#from sklearn.model_selection import KFold
#kf = KFold(n_splits=10, random_state=10)
#for train, test in kf.split(train_data, train_label):
    #print(test)

Note that cross-validation will be performed with the training data for each machine learning algorithm tested below.

## (Penalized) Logistic regression

From http://scikit-learn.org/stable/modules/linear_model.html#logistic-regression:
"LogisticRegressionCV implements Logistic Regression with builtin cross-validation to find out the optimal C parameter (similar to what GridSearchCV might do)."

In [11]:
from sklearn.linear_model import LogisticRegressionCV

First, let's try using the `liblinear` solver (recommended for small datasets) with 'l1' penalty:

In [12]:
# Instantiating logistic regression estimator object; cv=10 indicates 10-fold cross-validation

lrcv_l1 = LogisticRegressionCV(cv=10, penalty='l1', solver='liblinear', random_state=10)

In [13]:
%%time

lrcv_l1.fit(train_data, train_label)

Wall time: 5min 40s


LogisticRegressionCV(Cs=10, class_weight=None, cv=10, dual=False,
           fit_intercept=True, intercept_scaling=1.0, max_iter=100,
           multi_class='ovr', n_jobs=1, penalty='l1', random_state=10,
           refit=True, scoring=None, solver='liblinear', tol=0.0001,
           verbose=0)

In [14]:
lrcv_l1_pred = lrcv_l1.predict(test_data)
lrcv_l1_pp = lrcv_l1.predict_proba(test_data)

Running self.classes_ on the result from fitting the model to the data (see commented code below) tells us that the probability estimates for positive class ("True") are in column 1 rather than column 0 of the result from `predict_proba()` - this is what we'll need to use when calculating the ROC-AUC.

In [15]:
#lrcv_l1.fit(train_data_sc, train_label).classes_

In [16]:
from sklearn.metrics import roc_auc_score

lrcv_l1_probs = lrcv_l1.predict_proba(test_data)[:,1]
print('ROC-AUC score with l1 penalty:', roc_auc_score(test_label, lrcv_l1_probs))

ROC-AUC score with l1 penalty: 0.7012404754976451


What if we used 'l2' penalty instead?

In [17]:
%%time
# Repeating the code above for logistic regression but with l2 penalty

lrcv_l2 = LogisticRegressionCV(cv=10, penalty='l2', solver='liblinear', random_state=10)
lrcv_l2.fit(train_data, train_label)
lrcv_l2_pred = lrcv_l2.predict(test_data)
lrcv_l2_pp = lrcv_l2.predict_proba(test_data)
lrcv_l2_probs = lrcv_l2.predict_proba(test_data)[:,1]
print('ROC-AUC score with l2 penalty:', roc_auc_score(test_label, lrcv_l2_probs))

ROC-AUC score with l2 penalty: 0.6992008578787977
Wall time: 1min


Using the liblinear solver, l1 penalty has a very slightly better ROC-AUC score.

Now, let's get the confusion matrix and precision/recall/F1-score.

In [18]:
from sklearn.metrics import confusion_matrix

confusion_matrix(test_label, lrcv_l1_pred)

array([[385, 206],
       [237, 391]], dtype=int64)

In [19]:
from sklearn.metrics import classification_report

print(classification_report(test_label, lrcv_l1_pred))

             precision    recall  f1-score   support

      False       0.62      0.65      0.63       591
       True       0.65      0.62      0.64       628

avg / total       0.64      0.64      0.64      1219



## Stochastic Gradient Descent

We will test SGD optimization with logistic regression and modified Huber loss functions. From the documentation (http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html#sklearn.linear_model.SGDClassifier), "‘modified_huber’ is another smooth loss that brings tolerance to outliers as well as probability estimates". For each, we will test 'l2', 'l1', and 'elasticnet' penalties.

According to the documentation (http://scikit-learn.org/stable/modules/sgd.html#tips-on-practical-use), "finding a reasonable regularization term $\alpha$ is best done using GridSearchCV, usually in the range 10.0**-np.arange(1,7)."

In [20]:
# Importing SGDClassifier and GridSearchCV, creating alphas and penalties dictionaries for use with GridSearchCV

from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV

alphas = {'alpha': 10.0**-np.arange(1,7)}
penalties = {'l2': 0, 'l1': 1, 'elasticnet': 0.5}

### SGD with log loss

In [21]:
%%time
# Using GridSearchCV to find optimal value for alpha for each penalty type, using ROC-AUC score as with other models

gs_log_scores = {}
gs_log_param = {}

for k,v in penalties.items():
    print('\nPenalty:', k, '\tElastic Net mixing parameter:', v)
    gs_log = GridSearchCV(SGDClassifier(loss='log', penalty=k, l1_ratio=v, random_state=10), param_grid=alphas,
                          scoring='roc_auc', cv=10, verbose=3)
    gs_log_fit = gs_log.fit(train_data, train_label)
    score = gs_log.score(test_data, test_label)
    gs_log_scores[k] = score
    param = gs_log.best_params_
    gs_log_param[k] = param
    print('ROC-AUC score on test set for', k, 'penalty: %f' % score)
    print('Best alpha for', k, 'penalty: %s' % param)

print('\nFinal results:')
print(gs_log_param, gs_log_scores)


Penalty: l2 	Elastic Net mixing parameter: 0
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.561143586642478, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5670395753544312, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6054390915809984, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5568097829738627, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6212625142780352, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6486085343228202, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6466857817507169, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6347782088041829, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6460326869336945, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5277330849352189, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5574984882080225, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5622522340925888, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6092353692131962, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] .............. alpha=0.01, score=0.568047436672714, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6150641671705973, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6640074211502783, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] .............. alpha=0.01, score=0.625906560971496, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6482543430595378, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6348208993140825, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.5368617156406131, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5573641066989183, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............. alpha=0.001, score=0.565057448095142, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6121917624134919, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5701975408183835, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6236141906873613, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5899645808736718, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6031371226176421, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6094956991060887, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6074350071978999, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.5202472690320942, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ............ alpha=0.0001, score=0.557296915944366, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5631593092790432, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6062957737015386, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5571289390579857, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6267217630853994, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6570079271377973, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ............ alpha=0.0001, score=0.636768426378816, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6459436667228875, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6429333559149801, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5527140316707596, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5584223610831149, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5657629510179399, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6074380165289257, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5443458980044346, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6213633004098637, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6289087535840783, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6407825940293472, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............. alpha=1e-05, score=0.646483386743127, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6227792361757981, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5109492759759505, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5522408116643149, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5657629510179399, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6074380165289257, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5564234361351877, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6280991735537189, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6289087535840783, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6407825940293472, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............. alpha=1e-06, score=0.607556080283353, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6217291896011516, total=   0.0s
[CV] alpha=1e-06 .....................................................


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    4.4s finished


[CV] ............ alpha=1e-06, score=0.5109492759759505, total=   0.0s
ROC-AUC score on test set for l2 penalty: 0.616447
Best alpha for l2 penalty: {'alpha': 0.0001}

Penalty: l1 	Elastic Net mixing parameter: 1
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6536484579721831, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6131492306658604, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6388160989047906, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] ............... alpha=0.1, score=0.583971645501579, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6196247396358261, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5969472086355203, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6208466857817507, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5908753584078259, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ................ alpha=0.1, score=0.57927004826827, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6341603861461598, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.681515823422697, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.633524826983807, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6708996842034536, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6152489417456157, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6359940872135994, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] .............. alpha=0.01, score=0.675274076572778, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6324506662168999, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6271715297689324, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6310949275975951, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6402404945380642, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6022475307397702, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6084962709131224, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6651548746892427, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6061613921924343, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6347510582543843, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6685107100691516, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6420475628267837, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............. alpha=0.001, score=0.654005734525215, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6495046151240578, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5761537810144806, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5905227440704159, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6042968487536116, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6285023180810321, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ............ alpha=0.0001, score=0.593277565007055, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6255963179466505, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6770281666385561, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6488615280823073, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6601956485073368, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ............ alpha=0.0001, score=0.659869590989923, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5508679820475908, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5903211718067594, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5945709870321844, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6309883759994626, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.567375529127192, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6163911845730028, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6734187890032046, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6388092427053466, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6374430764041153, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............. alpha=1e-05, score=0.631416716064019, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5679566432382082, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5596317946650541, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5752536450984345, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6152153463683397, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5562386615601693, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6191124101323658, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6123798279642435, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6374262101534828, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6477146230392984, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6359725632991786, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5502921500550428, total=   0.0s


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    4.9s finished


ROC-AUC score on test set for l1 penalty: 0.638196
Best alpha for l1 penalty: {'alpha': 0.01}

Penalty: elasticnet 	Elastic Net mixing parameter: 0.5
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.674477591883357, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6009205133373648, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6066149297856613, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.603305785123967, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6123429416112344, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6560634171023783, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6518974531961546, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6373756114015855, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6249470742653909, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5944787873655686, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6269569307263321, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6410165961163743, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5917657730296311, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6203050460256668, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6536988510380972, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6832686793725755, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5995108787316579, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6527744982290437, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6305529680751969, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............... alpha=0.01, score=0.63111186383267, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.5615635288584291, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5761607202848887, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............. alpha=0.001, score=0.619969092252906, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.5735234831687159, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6134011959954311, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6615280823073031, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6179456906729633, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6619328723224828, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6327208061647895, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.5304767550173597, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5705838876570584, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5761271249076126, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6232446415373245, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ............ alpha=0.0001, score=0.560370892965128, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ............ alpha=0.0001, score=0.622270375596318, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6293978748524204, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6393658289762186, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6486422668240851, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6360233720044033, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5224659158269117, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5563898407579118, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5810488476785595, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6154001209433582, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.560320499899214, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6169119129207821, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6615280823073031, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6262270197335132, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6416259065609715, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6317893132356678, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5665848081971379, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5497715514345226, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5642175636632399, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6120741785930256, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5708526506752671, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6159040516024995, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6412379827964245, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6424354865913308, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6531624219935909, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6453213650605472, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5503768312304175, total=   0.0s
ROC-AUC score on test set for elasticnet penalty: 0.679306
Best alpha for elasticnet penalty: {'alpha': 0.01}

Final results:
{'l2': {'alpha': 0.0001}, 'l1': {'alpha': 0.01}, 'elasticnet': {'alpha': 0.01}} {'l2': 0.6164468082813325, 'l1': 0.6381955446344855, 'elasticnet': 0.679305829480423}
Wall time: 14.7 s


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    4.9s finished


In [22]:
log_penalty = max(gs_log_scores, key=lambda key: gs_log_scores[key])
print('Best performance for SGD with log loss ->', log_penalty, '\nROC-AUC score:', gs_log_scores[log_penalty],
      '\nalpha:', gs_log_param[log_penalty])

Best performance for SGD with log loss -> elasticnet 
ROC-AUC score: 0.679305829480423 
alpha: {'alpha': 0.01}


Using the best performer to get predicted values and other evaluation metrics:

In [23]:
sgd_log_best = SGDClassifier(loss='log', penalty=log_penalty, alpha=gs_log_param[log_penalty].get('alpha'), 
                             l1_ratio=penalties[log_penalty], random_state=10)
sgd_log_best.fit(train_data, train_label)
sgd_log_pred = sgd_log_best.predict(test_data)



In [24]:
sgd_log_pred

array([ True,  True, False, ..., False, False, False])

In [25]:
confusion_matrix(test_label, sgd_log_pred)

array([[548,  43],
       [501, 127]], dtype=int64)

In [26]:
print(classification_report(test_label, sgd_log_pred))

             precision    recall  f1-score   support

      False       0.52      0.93      0.67       591
       True       0.75      0.20      0.32       628

avg / total       0.64      0.55      0.49      1219



### SGD with modified Huber loss

In [27]:
%%time
# Using GridSearchCV to find optimal value for alpha for each penalty type, using ROC-AUC score as with other models

gs_hub_scores = {}
gs_hub_param = {}

for k,v in penalties.items():
    print('\nPenalty:', k, '\tElastic Net mixing parameter:', v)
    gs_hub = GridSearchCV(SGDClassifier(loss='modified_huber', penalty=k, l1_ratio=v, random_state=10), param_grid=alphas,
                          scoring='roc_auc', cv=10, verbose=3)
    gs_hub_fit = gs_hub.fit(train_data, train_label)
    score_hub = gs_hub.score(test_data, test_label)
    gs_hub_scores[k] = score_hub
    param_hub = gs_hub.best_params_
    gs_hub_param[k] = param_hub
    print('ROC-AUC score on test set for', k, 'penalty: %f' % score_hub)
    print('Best alpha for', k, 'penalty: %s' % param_hub)

print('\nFinal results:')
print(gs_hub_param, gs_hub_scores)


Penalty: l2 	Elastic Net mixing parameter: 0
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.5516864879392596, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5659141302156823, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6114022710475039, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.5512833434119465, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.614929785661493, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6475122280317085, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6510204081632653, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.638893573958509, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6099754424591414, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.5411465831145736, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5542229389236042, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5687193442182356, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6077067795471344, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5726835987368137, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6081603171403615, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6348962725586103, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6264294147411031, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6415753078090739, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.649182826657634, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.5466000508087052, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.5530806960962171, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5680138412954377, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6074380165289257, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.5709702344957334, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] .............. alpha=0.001, score=0.60989047907008, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6253668409512565, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6590993422162253, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.5868443245066621, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6391735117283428, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5109492759759505, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5512833434119464, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5620674595175703, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6140227104750386, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5724148357186052, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6227575085668212, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6289087535840783, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6407825940293472, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6351829988193625, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6130747734778559, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5109492759759505, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5584223610831149, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5657629510179399, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6074380165289257, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5443458980044346, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6213633004098637, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6289087535840783, total=   0.0s
[CV] alpha=1e-05 .....................................................








[CV] ............ alpha=1e-05, score=0.6407825940293472, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6515263956822398, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6407993902955373, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5109492759759505, total=   0.0s




[CV] alpha=1e-06 .....................................................




[CV] ............. alpha=1e-06, score=0.551434522609689, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5657629510179399, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6074380165289257, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5564234361351877, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6280991735537189, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6289087535840783, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6407825940293472, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............. alpha=1e-06, score=0.607556080283353, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6217291896011516, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5109492759759505, total=   0.0s
ROC-AUC score on test set for l2 penalty: 0.633324
Best alpha for l2 penalty: {'alpha': 0.01}

Penalty: l1 	Elastic Net mixing parameter: 1
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6664482967143721, total=   0.0s


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    4.3s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6360444802795135, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6584022038567494, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6104783981724116, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6423268158301418, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.654562320796087, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5945690672963401, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6181649519311858, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6187653484630367, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6238970276907443, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] .............. alpha=0.01, score=0.678794597863334, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.632348988779144, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6759221931062285, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6064805482765572, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.633877578445206, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6744644965424185, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6281497723056165, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6335132400067465, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6489457193665847, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6057413836904056, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6005341664986897, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6233286299805147, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6415205267755155, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5779748706577975, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6319290465631928, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6868780570079271, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6406476640242874, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6383201214370046, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6482174612583622, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6087899060038954, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5953772760868106, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5982496808439158, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6291238325606398, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5797050325875159, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6333568500974265, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6646483386743126, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ............ alpha=0.0001, score=0.623815145893068, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6590318772136954, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6536370564823439, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5798458802608181, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5793186857488409, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5907747093999867, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.620338641402943, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5799569979170867, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6276624336491299, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6610895597908586, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6388092427053466, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6226176420981616, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6553476162249132, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5408417308832246, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5596317946650541, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5752536450984345, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6152321440569778, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5562386615601693, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6191124101323658, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............. alpha=1e-06, score=0.612362961713611, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............. alpha=1e-06, score=0.637392477652218, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6543093270365999, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6359894995342535, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5502921500550428, total=   0.0s
ROC-AUC score on test set for l1 penalty: 0.645947
Best alpha for l1 penalty: {'alpha': 0.01}

Penalty: elasticnet 	Elastic Net mixing parameter: 0.5
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    4.7s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6682456493986428, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6112846872270375, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.624218907478331, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6047839817241147, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6320634280722972, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6755102040816328, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6067633665036263, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6527407657277787, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6299601998475737, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6350918790752815, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5546932742054693, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5674931129476585, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] .............. alpha=0.01, score=0.607975542565343, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.5659813209702345, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6027178660216354, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6858492157193457, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6518974531961546, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6520829819531119, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6435938690829028, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.548734016428148, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5694416448296715, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.5798058187193442, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6200866760733722, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5553147886850769, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6165759591480212, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6614943498060382, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6414235115533817, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............. alpha=0.001, score=0.659993253499747, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6474383944449149, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5411635193496486, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5639152052677551, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5837364778606464, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6248068265806624, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5664852516293758, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6285695088355843, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6352335975712599, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6258222297183336, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6601787822567043, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6256075874333136, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5439071894317893, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.567375529127192, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5724316334072431, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6112342941611234, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5562890546260835, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6074884095948397, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6417945690672964, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6294484736043178, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6443245066621689, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6510966212211026, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5666017444322128, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5590438755627226, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5642175636632399, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6120741785930256, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5546764765168313, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6159040516024995, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6412211165457918, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6524540394670264, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6531961544948558, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6453213650605472, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5503937674654924, total=   0.0s
ROC-AUC score on test set for elasticnet penalty: 0.679640
Best alpha for elasticnet penalty: {'alpha': 0.1}

Final results:
{'l2': {'alpha': 0.01}, 'l1': {'alpha': 0.01}, 'elasticnet': {'alpha': 0.1}} {'l2': 0.6333241725672778, 'l1': 0.6459471693232888, 'elasticnet': 0.6796399280071562}
Wall time: 14.3 s


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    4.8s finished


In [28]:
hub_penalty = max(gs_hub_scores, key=lambda key: gs_hub_scores[key])
print('Best performance for SGD with modified Huber loss ->', hub_penalty, '\nROC-AUC score:', gs_hub_scores[hub_penalty],
      '\nalpha:', gs_hub_param[hub_penalty])

Best performance for SGD with modified Huber loss -> elasticnet 
ROC-AUC score: 0.6796399280071562 
alpha: {'alpha': 0.1}


Using the best performer to get predicted values and other evaluation metrics:

In [29]:
sgd_hub_best = SGDClassifier(loss='modified_huber', penalty=hub_penalty, alpha=gs_hub_param[hub_penalty].get('alpha'),
                             l1_ratio=penalties[hub_penalty], random_state=10)
sgd_hub_best.fit(train_data, train_label)
sgd_hub_pred = sgd_hub_best.predict(test_data)



In [30]:
sgd_hub_pred

array([ True,  True, False, ..., False, False, False])

In [31]:
confusion_matrix(test_label, sgd_hub_pred)

array([[508,  83],
       [430, 198]], dtype=int64)

In [32]:
print(classification_report(test_label, sgd_hub_pred))

             precision    recall  f1-score   support

      False       0.54      0.86      0.66       591
       True       0.70      0.32      0.44       628

avg / total       0.63      0.58      0.55      1219

