In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Reading in and preprocessing data

In [None]:
# Reading in data from the 50-50 readmit vs. nonreadmit dataset

df = pd.read_csv('dfd.csv')
df.head()

In [None]:
# Dropping certain columns

# Dropping ID numbers and dates
df = df.drop(columns=['subject_id', 'hadm_id', 'admittime', 'dischtime'])

# Dropping labevents and chartevents values of less importance based on feature selection (2/3 values for each measurement)
df = df.drop(columns=['rdw_min', 'rdw_max', 'hemoglobin_min', 'hemoglobin_max', 'creatinine_median', 'creatinine_min', 
                      'hematocrit_median', 'hematocrit_min', 'tempc_median', 'tempc_max', 'resprate_median', 
                      'resprate_min', 'wbc_median', 'wbc_max', 'inr_min', 'inr_median', 'ptt_median', 'ptt_max', 
                      'lactate_median', 'lactate_max', 'sysbp_median', 'sysbp_min', 'spo2_median', 'spo2_max', 
                      'bilirubin_median', 'bilirubin_max', 'platelet_median', 'platelet_max', 'heartrate_min',
                      'heartrate_median'])
df.head()

In [None]:
# Converting categorical features into dummy variables

df_converted = pd.get_dummies(df)
df_converted.head()

In [None]:
# Splitting dataframe into data (predictors) vs. label (attributed to be predicted)

label_df = df_converted.pop('followed_by_readmit')
data_df = df_converted
print('label_df:\n', label_df.head(), 2*'\n', 'data_df:\n', data_df.head())

In [6]:
# Converting dataframes to NumPy arrays

label = label_df.values
data = data_df.values

In [7]:
label

array([ True,  True,  True, ..., False, False, False])

In [8]:
data

array([[70, 8, 0, ..., 0, 0, 0],
       [42, 19, 0, ..., 0, 0, 0],
       [60, 8, 0, ..., 0, 1, 0],
       ...,
       [74, 19, 0, ..., 0, 0, 0],
       [67, 7, 0, ..., 0, 0, 0],
       [59, 9, 0, ..., 0, 0, 0]], dtype=object)

## Train/test split

In [9]:
# 80/20 train-test split

from sklearn.model_selection import train_test_split

train_data, test_data, train_label, test_label = train_test_split(data, label, train_size=0.8, test_size=0.2, random_state=10)

print('Training data:', train_data.shape, '\tTest data:', test_data.shape)
print('Training labels:', train_label.shape, '\tTest labels:', test_label.shape)

Training data: (4873, 110) 	Test data: (1219, 110)
Training labels: (4873,) 	Test labels: (1219,)


In [10]:
# Cross-validation on the training set (no need to do explicitly for Logistic Reg. since has a cv parameter, but just in case...)

#from sklearn.model_selection import KFold
#kf = KFold(n_splits=10, random_state=10)
#for train, test in kf.split(train_data, train_label):
    #print(test)

Note that cross-validation will be performed with the training data for each machine learning algorithm tested below.

## Feature scaling

Since penalized logistic regression and stochastic gradient descent are both sensitive to scaling (especially the latter), we'll first scale each attribute in `train_data` to [0,1] and then apply the transformation to `test_data`. The 'DataConversionWarning' that appears lets us know that for scaling, True was converted to 1, and False to 0.

In [11]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(train_data)
train_data_sc = scaler.transform(train_data)
test_data_sc = scaler.transform(test_data)



## (Penalized) Logistic regression

From http://scikit-learn.org/stable/modules/linear_model.html#logistic-regression:
"LogisticRegressionCV implements Logistic Regression with builtin cross-validation to find out the optimal C parameter (similar to what GridSearchCV might do)."

In [12]:
from sklearn.linear_model import LogisticRegressionCV

First, let's try using the `liblinear` solver (recommended for small datasets) with 'l1' penalty:

In [13]:
# Instantiating logistic regression estimator object; cv=10 indicates 10-fold cross-validation

lrcv_l1 = LogisticRegressionCV(cv=10, penalty='l1', solver='liblinear', random_state=10)

In [14]:
%%time

lrcv_l1.fit(train_data_sc, train_label)

Wall time: 3min 50s


LogisticRegressionCV(Cs=10, class_weight=None, cv=10, dual=False,
           fit_intercept=True, intercept_scaling=1.0, max_iter=100,
           multi_class='ovr', n_jobs=1, penalty='l1', random_state=10,
           refit=True, scoring=None, solver='liblinear', tol=0.0001,
           verbose=0)

In [15]:
lrcv_l1_pred = lrcv_l1.predict(test_data_sc)
lrcv_l1_pp = lrcv_l1.predict_proba(test_data_sc)

Running self.classes_ on the result from fitting the model to the data (see commented code below) tells us that the probability estimates for positive class ("True") are in column 1 rather than column 0 of the result from `predict_proba()` - this is what we'll need to use when calculating the ROC-AUC.

In [16]:
#lrcv_l1.fit(train_data_sc, train_label).classes_

In [17]:
from sklearn.metrics import roc_auc_score

lrcv_l1_probs = lrcv_l1.predict_proba(test_data_sc)[:,1]
print('ROC-AUC score with l1 penalty:', roc_auc_score(test_label, lrcv_l1_probs))

ROC-AUC score with l1 penalty: 0.7062815911711771


What if we used 'l2' penalty instead?

In [18]:
%%time
# Repeating the code above for logistic regression but with l2 penalty

lrcv_l2 = LogisticRegressionCV(cv=10, penalty='l2', solver='liblinear', random_state=10)
lrcv_l2.fit(train_data_sc, train_label)
lrcv_l2_pred = lrcv_l2.predict(test_data_sc)
lrcv_l2_pp = lrcv_l2.predict_proba(test_data_sc)
lrcv_l2_probs = lrcv_l2.predict_proba(test_data_sc)[:,1]
print('ROC-AUC score with l2 penalty:', roc_auc_score(test_label, lrcv_l2_probs))

ROC-AUC score with l2 penalty: 0.6945019237608718
Wall time: 16 s


Using the liblinear solver, l1 penalty has a very slightly better ROC-AUC score.

Now, let's get the confusion matrix and precision/recall/F1-score.

In [19]:
from sklearn.metrics import confusion_matrix

confusion_matrix(test_label, lrcv_l1_pred)

array([[392, 199],
       [238, 390]], dtype=int64)

In [20]:
from sklearn.metrics import classification_report

print(classification_report(test_label, lrcv_l1_pred))

             precision    recall  f1-score   support

      False       0.62      0.66      0.64       591
       True       0.66      0.62      0.64       628

avg / total       0.64      0.64      0.64      1219



## Stochastic Gradient Descent

We will test SGD optimization with logistic regression and modified Huber loss functions. From the documentation (http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html#sklearn.linear_model.SGDClassifier), "‘modified_huber’ is another smooth loss that brings tolerance to outliers as well as probability estimates". For each, we will test 'l2', 'l1', and 'elasticnet' penalties.

According to the documentation (http://scikit-learn.org/stable/modules/sgd.html#tips-on-practical-use), "finding a reasonable regularization term $\alpha$ is best done using GridSearchCV, usually in the range 10.0**-np.arange(1,7)."

In [21]:
# Importing SGDClassifier and GridSearchCV, creating alphas and penalties dictionaries for use with GridSearchCV

from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV

alphas = {'alpha': 10.0**-np.arange(1,7)}
penalties = {'l2': 0, 'l1': 1, 'elasticnet': 0.5}

### SGD with log loss

In [22]:
%%time
# Using GridSearchCV to find optimal value for alpha for each penalty type, using ROC-AUC score as with other models

gs_log_scores = {}
gs_log_param = {}

for k,v in penalties.items():
    print('\nPenalty:', k, '\tElastic Net mixing parameter:', v)
    gs_log = GridSearchCV(SGDClassifier(loss='log', penalty=k, l1_ratio=v, random_state=10), param_grid=alphas,
                          scoring='roc_auc', cv=10, verbose=3)
    gs_log_fit = gs_log.fit(train_data_sc, train_label)
    score = gs_log.score(test_data_sc, test_label)
    gs_log_scores[k] = score
    param = gs_log.best_params_
    gs_log_param[k] = param
    print('ROC-AUC score on test set for', k, 'penalty: %f' % score)
    print('Best alpha for', k, 'penalty: %s' % param)

print('\nFinal results:')
print(gs_log_param, gs_log_scores)


Penalty: l2 	Elastic Net mixing parameter: 0
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6495666196331386, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.621884028757643, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6812134650272124, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6297453470402472, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6330544917019418, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6571765896441221, total=   0.0s
[CV] alpha=0.1 ..........................

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6339180300219261, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.639551357733176, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6449318316538233, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6177153018883902, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6643653833232547, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6359772895249614, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] .............. alpha=0.01, score=0.701992205872472, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.641251763757307, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6502385271786604, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6805194805194805, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6557260920897284, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6549502445606342, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6664069777288508, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6419171818104835, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6471477524692603, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6193139823960223, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.7044278707249882, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6213800980985016, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............. alpha=0.001, score=0.652909359672109, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] .



[CV] ............ alpha=0.001, score=0.6512902681733851, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6494855793557093, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6510966212211026, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............. alpha=0.001, score=0.647150478448641, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5935463280252637, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5639655983336693, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6336760061815494, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] .



[CV] ........... alpha=0.0001, score=0.6154337163206343, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5737392477652218, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6233428908753585, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6227188396019565, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6043864848844102, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5782200016936235, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5922529060001345, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] .



[CV] ............ alpha=1e-05, score=0.5844083854061681, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.604112074178593, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5687299713273739, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6239500758981279, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6233091583740934, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5630620713015496, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6015750698619697, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] .



[CV] ............ alpha=1e-06, score=0.5747833098165692, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6117550225089028, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5754384196734529, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6253443526170799, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5728116039804352, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............. alpha=1e-06, score=0.627576319784112, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6097655591162086, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] .

[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    1.6s finished


ROC-AUC score on test set for l2 penalty: 0.689003
Best alpha for l2 penalty: {'alpha': 0.01}

Penalty: l1 	Elastic Net mixing parameter: 1
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6227827050997783, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.613401195995431, total=   0.0s




[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6664986897802863, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6042800510649735, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6199606934085871, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6407066959015011, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6194467869792545, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6180974869286557, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6135913286476417, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6163434668473199, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6777531411677753, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6264362023785527, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............. alpha=0.001, score=0.686941476852785, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6295941678425049, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6546059262245515, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6722381514589308, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6549671108112667, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6652217911958171, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6795325599119315, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6475908205605894, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6615601693207015, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6000974265941006, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6373546999932809, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5786131828260431, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6451992205872472, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6598077247427898, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6365491651205938, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6651880586945522, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6325175713438902, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5936150393767465, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5828797957401062, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5817375529127192, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6360780756567896, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5808976684808171, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6119229993952833, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5718502276943835, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6224152470905717, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6245235284196323, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.592920653738674, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5983910576678805, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6339951622656722, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5652926157360747, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6430995095074917, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5548444534032118, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6192467916414701, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5507168156518806, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6209816157868105, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6339348962725586, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5803031586078415, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5905665170632568, total=   0.0s
ROC-AUC score on test set for l1 penalty: 0.694507
Best alpha for l1 penalty: {'alpha': 0.001}

Penalty: elasticnet 	Elastic Net mixing parameter: 0.5
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    2.2s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6496338103876906, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.631996237317745, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6795168984747699, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6280235839548478, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6400339313310489, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6602631135098669, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6315230224321133, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6397706189913983, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6398001524261157, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6289948344483023, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6687999731236982, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6194483639051267, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6931230262715851, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6309547806221864, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6584190015453872, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6812278630460449, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6553213020745489, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6596053297351998, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6756033533745448, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6448979591836734, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6337263992474635, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5853154605926224, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6505576832627831, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5701135523751931, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6353221796680777, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6337999662674987, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6328385899814472, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6477483555405633, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5889067660259124, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5904479634177322, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.619565947725593, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5575992743398508, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6445609084190015, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.570835852986629, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6258650809648592, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5913813459268005, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6227863046044864, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6188733344577502, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5862816495892963, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6002371072910492, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5923368944433247, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5942350332594235, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6510784116105625, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5856010212994692, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6099744675132702, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5601956485073367, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6244897959183674, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6385899814471243, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5748666271487848, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5761368447794055, total=   0.0s
ROC-AUC score on test set for elasticnet penalty: 0.693575
Best alpha for elasticnet penalty: {'alpha': 0.001}

Final results:
{'l2': {'alpha': 0.01}, 'l1': {'alpha': 0.001}, 'elasticnet': {'alpha': 0.001}} {'l2': 0.689

[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    2.2s finished


In [23]:
log_penalty = max(gs_log_scores, key=lambda key: gs_log_scores[key])
print('Best performance for SGD with log loss ->', log_penalty, '\nROC-AUC score:', gs_log_scores[log_penalty],
      '\nalpha:', gs_log_param[log_penalty])

Best performance for SGD with log loss -> l1 
ROC-AUC score: 0.6945073124467869 
alpha: {'alpha': 0.001}


Using the best performer to get predicted values and other evaluation metrics:

In [24]:
sgd_log_best = SGDClassifier(loss='log', penalty=log_penalty, alpha=gs_log_param[log_penalty].get('alpha'), 
                             l1_ratio=penalties[log_penalty], random_state=10)
sgd_log_best.fit(train_data_sc, train_label)
sgd_log_pred = sgd_log_best.predict(test_data_sc)



In [25]:
sgd_log_pred

array([ True,  True,  True, ..., False, False, False])

In [26]:
confusion_matrix(test_label, sgd_log_pred)

array([[469, 122],
       [359, 269]], dtype=int64)

In [27]:
print(classification_report(test_label, sgd_log_pred))

             precision    recall  f1-score   support

      False       0.57      0.79      0.66       591
       True       0.69      0.43      0.53       628

avg / total       0.63      0.61      0.59      1219



### SGD with modified Huber loss

In [28]:
%%time
# Using GridSearchCV to find optimal value for alpha for each penalty type, using ROC-AUC score as with other models

gs_hub_scores = {}
gs_hub_param = {}

for k,v in penalties.items():
    print('\nPenalty:', k, '\tElastic Net mixing parameter:', v)
    gs_hub = GridSearchCV(SGDClassifier(loss='modified_huber', penalty=k, l1_ratio=v, random_state=10), param_grid=alphas,
                          scoring='roc_auc', cv=10, verbose=3)
    gs_hub_fit = gs_hub.fit(train_data_sc, train_label)
    score_hub = gs_hub.score(test_data_sc, test_label)
    gs_hub_scores[k] = score_hub
    param_hub = gs_hub.best_params_
    gs_hub_param[k] = param_hub
    print('ROC-AUC score on test set for', k, 'penalty: %f' % score_hub)
    print('Best alpha for', k, 'penalty: %s' % param_hub)

print('\nFinal results:')
print(gs_hub_param, gs_hub_scores)


Penalty: l2 	Elastic Net mixing parameter: 0
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6634751058254384, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6367835785795875, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.7008499630450851, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6419068736141906, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6487603305785125, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6792039129701468, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6561477483555405, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] ............... alpha=0.1, score=0.652858829482206, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6653061224489797, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.6388855957320688, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6467278102533092, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6200530806960962, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.7033528186521534, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6210945373916549, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6510112208560104, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6630460448642267, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6520323832012143, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] .............. alpha=0.01, score=0.650177095631641, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6505885341688542, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6471166059784911, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5919001545387355, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5652422226701606, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6304340522744071, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.5721628703890346, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............. alpha=0.001, score=0.610679970436068, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5685107100691517, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6239838083993928, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6197166469893742, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5997967651791006, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5736810906935388, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5842572062084258, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5519720486461064, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6236645837532755, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5663844654975476, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6160216354229657, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5909934221622534, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6098161578681058, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5780401416765053, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ............ alpha=0.0001, score=0.584859005843001, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5595562706410365, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5947893569844789, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5590438755627225, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5899012295908083, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5852314721494322, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.608361889404018, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5846517119244392, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6069657615112161, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6234946871310507, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5651621644508427, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5550173596409518, total=   0.0s




[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5818047436672714, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5651750319156085, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6541859840086004, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5867768595041323, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............. alpha=1e-06, score=0.596771484243768, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5962556923595884, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6129026817338505, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6246921909259572, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5642137352866458, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5826403590481836, total=   0.0s
ROC-AUC score on test set for l2 penalty: 0.687489
Best alpha for l2 penalty: {'alpha': 0.1}

Penalty: l1 	Elastic Net mixing parameter: 1
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5705838876570584, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    1.4s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.5582879795740106, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5622522340925888, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5418934354632803, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.5521568232211247, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5667060212514757, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5562995446112329, total=   0.0s
[CV] alpha=0.1 .......................................................




[CV] .............. alpha=0.1, score=0.5607859672794737, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.519883139977983, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............................. alpha=0.1, score=0.5, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6698246321306189, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6343311160384331, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6747715514345225, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6268897399717799, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6440233823825842, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6596390622364648, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6460111317254174, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6505987518974532, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] .............. alpha=0.01, score=0.651282919806927, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6437124227284275, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6917120204259893, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6179701673049789, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6303332661425788, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5926392528388094, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6632063428072297, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6515769944341374, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6375948726598077, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6609546297857987, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6414768396985351, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............. alpha=0.001, score=0.589922940130409, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5639823960223074, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5698279916683465, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6108983403883625, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5701807431297454, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6364644224954648, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5920728622027324, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6114353179288244, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.6224995783437342, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ............ alpha=0.0001, score=0.589939876365484, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5806757557794902, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5930759927433985, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5679130551636095, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6214136934757777, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5740946045824095, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............. alpha=1e-05, score=0.611099912652019, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5664867599932536, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6551863720694889, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.6218923933209648, total=   0.0s
[CV] alpha=1e-05 .....................................................




[CV] ............ alpha=1e-05, score=0.5757473113726819, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5695147768651029, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5608916213129074, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5592958408922932, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6397903648457972, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5708526506752671, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.6128636699590136, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5894754596053298, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6002192612582223, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6290436835891381, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5967821153357609, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5995596578880515, total=   0.0s


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    2.0s finished


ROC-AUC score on test set for l1 penalty: 0.688275
Best alpha for l1 penalty: {'alpha': 0.01}

Penalty: elasticnet 	Elastic Net mixing parameter: 0.5
Fitting 10 folds for each of 6 candidates, totalling 60 fits
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6210021501041456, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6140059127864006, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6344235033259424, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6022811261170463, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6219428206678761, total=   0.0s
[CV] alpha=0.1 .......................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] .............. alpha=0.1, score=0.6375189745319616, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] ............... alpha=0.1, score=0.613332771124979, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.6101281835048068, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5903294097722077, total=   0.0s
[CV] alpha=0.1 .......................................................
[CV] .............. alpha=0.1, score=0.5756118214920822, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6709332795807297, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6415541221527918, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6842874420479741, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6310219713767385, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6483403883625614, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6743970315398887, total=   0.0s
[CV] alpha=0.01 ......................................................




[CV] ............. alpha=0.01, score=0.6477652217911958, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............... alpha=0.01, score=0.65125653567212, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6704716741468372, total=   0.0s
[CV] alpha=0.01 ......................................................
[CV] ............. alpha=0.01, score=0.6344313659073588, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............. alpha=0.001, score=0.666750655109857, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6015588255056104, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6422428273869516, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5544413088758986, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............. alpha=0.001, score=0.652506215144796, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5983302411873841, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............. alpha=0.001, score=0.623258559622196, total=   0.0s
[CV] alpha=0.001 .....................................................




[CV] ............ alpha=0.001, score=0.6247090571765896, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.6336184266237614, total=   0.0s
[CV] alpha=0.001 .....................................................
[CV] ............ alpha=0.001, score=0.5640105004657464, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5814351945172345, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5505610428005107, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6234462138009811, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5666700262043942, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6413189545118593, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ............ alpha=0.0001, score=0.601602293810086, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.6328217237308147, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5965930173722382, total=   0.0s
[CV] alpha=0.0001 ....................................................
[CV] ........... alpha=0.0001, score=0.5870099076975188, total=   0.0s
[CV] alpha=0.0001 ....................................................




[CV] ........... alpha=0.0001, score=0.5844355999661275, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5972418195256334, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5975777732983941, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6413021568232211, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5504434589800443, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6072700396425452, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5477146230392984, total=   0.0s




[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.6041659639062237, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............. alpha=1e-05, score=0.604351492663181, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5844355999661275, total=   0.0s
[CV] alpha=1e-05 .....................................................
[CV] ............ alpha=1e-05, score=0.5625878567194513, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6179701673049789, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5757407780689376, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6443257407780689, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5602701068332998, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............. alpha=1e-06, score=0.594419807834442, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5770113003879238, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6217574633159049, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.6054815314555575, total=   0.0s
[CV] alpha=1e-06 .....................................................




[CV] ............ alpha=1e-06, score=0.5778474045219748, total=   0.0s
[CV] alpha=1e-06 .....................................................
[CV] ............ alpha=1e-06, score=0.5817766110593615, total=   0.0s
ROC-AUC score on test set for elasticnet penalty: 0.686783
Best alpha for elasticnet penalty: {'alpha': 0.01}

Final results:
{'l2': {'alpha': 0.1}, 'l1': {'alpha': 0.01}, 'elasticnet': {'alpha': 0.01}} {'l2': 0.6874885490424305, 'l1': 0.6882752971860282, 'elasticnet': 0.6867826311875586}
Wall time: 5.92 s


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    2.1s finished


In [29]:
hub_penalty = max(gs_hub_scores, key=lambda key: gs_hub_scores[key])
print('Best performance for SGD with modified Huber loss ->', hub_penalty, '\nROC-AUC score:', gs_hub_scores[hub_penalty],
      '\nalpha:', gs_hub_param[hub_penalty])

Best performance for SGD with modified Huber loss -> l1 
ROC-AUC score: 0.6882752971860282 
alpha: {'alpha': 0.01}


Using the best performer to get predicted values and other evaluation metrics:

In [30]:
sgd_hub_best = SGDClassifier(loss='modified_huber', penalty=hub_penalty, alpha=gs_hub_param[hub_penalty].get('alpha'),
                             l1_ratio=penalties[hub_penalty], random_state=10)
sgd_hub_best.fit(train_data_sc, train_label)
sgd_hub_pred = sgd_hub_best.predict(test_data_sc)



In [31]:
sgd_hub_pred

array([ True,  True,  True, ..., False, False, False])

In [32]:
confusion_matrix(test_label, sgd_hub_pred)

array([[459, 132],
       [338, 290]], dtype=int64)

In [33]:
print(classification_report(test_label, sgd_hub_pred))

             precision    recall  f1-score   support

      False       0.58      0.78      0.66       591
       True       0.69      0.46      0.55       628

avg / total       0.63      0.61      0.61      1219

