In [46]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [47]:
# Read Training and Test Data into dataframes
training_data = pd.read_csv('training_set.csv',index_col=0)
test_data = pd.read_csv('test_set.csv',index_col=0)

In [48]:
best_features = pd.read_csv('best_features_list.csv', header=None).iloc[:,0].tolist()

In [49]:
best_features

['X1',
 'X2',
 'X3',
 'X5',
 'X6',
 'X7',
 'X8',
 'X9',
 'X10',
 'X12',
 'X13',
 'X14',
 'X15',
 'X16',
 'X17',
 'X18',
 'X19',
 'X20',
 'X21',
 'X22',
 'X23',
 'X24',
 'X25',
 'X26',
 'X27',
 'X28',
 'X29',
 'X30',
 'X33',
 'X35',
 'X36',
 'X37',
 'X39',
 'X41',
 'X42',
 'X43',
 'X44',
 'X45',
 'X46',
 'X48',
 'X49',
 'X50',
 'X52',
 'X53',
 'X54',
 'X55',
 'X56',
 'X57',
 'Y']

In [61]:
%%time
X = training_data.loc[:, best_features].drop(['Y'],axis=1)
y = training_data['Y']
X = RobustScaler().fit_transform(X)
    
param_grid = [
    {'penalty': ['l1', 'l2'], 
     'C': np.logspace(-2, 2, 10), 
     'solver': ['liblinear'],
     'class_weight' : ['balanced', None]
    }
]

lr = LogisticRegression()

#n_splits is set to 5 to divide the data in 4:1 ratio

cv = RepeatedStratifiedKFold(
    n_splits=5, random_state=42
)

search = GridSearchCV(
    estimator=lr, param_grid=param_grid,
    scoring='roc_auc', cv=cv, verbose=3
)

search.fit(X, y)

Fitting 50 folds for each of 40 candidates, totalling 2000 fits
[CV 1/50] END C=0.01, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.941 total time=   0.0s
[CV 2/50] END C=0.01, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.948 total time=   0.0s
[CV 3/50] END C=0.01, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.952 total time=   0.0s
[CV 4/50] END C=0.01, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.957 total time=   0.0s
[CV 5/50] END C=0.01, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.954 total time=   0.0s
[CV 6/50] END C=0.01, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.952 total time=   0.0s
[CV 7/50] END C=0.01, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.950 total time=   0.0s
[CV 8/50] END C=0.01, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.946 total time=   0.0s
[CV 9/50] END C=0.01, class_weight=balanced, penalty=l1, solver=liblinear;, scor

[CV 33/50] END C=0.01, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.957 total time=   0.0s
[CV 34/50] END C=0.01, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.971 total time=   0.0s
[CV 35/50] END C=0.01, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.954 total time=   0.0s
[CV 36/50] END C=0.01, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.962 total time=   0.0s
[CV 37/50] END C=0.01, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.965 total time=   0.0s
[CV 38/50] END C=0.01, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.956 total time=   0.0s
[CV 39/50] END C=0.01, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.962 total time=   0.0s
[CV 40/50] END C=0.01, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.966 total time=   0.0s
[CV 41/50] END C=0.01, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.973 total time=   0.0s
[CV 42/50] END C=0.01, class

[CV 15/50] END C=0.01, class_weight=None, penalty=l2, solver=liblinear;, score=0.959 total time=   0.0s
[CV 16/50] END C=0.01, class_weight=None, penalty=l2, solver=liblinear;, score=0.966 total time=   0.0s
[CV 17/50] END C=0.01, class_weight=None, penalty=l2, solver=liblinear;, score=0.957 total time=   0.0s
[CV 18/50] END C=0.01, class_weight=None, penalty=l2, solver=liblinear;, score=0.960 total time=   0.0s
[CV 19/50] END C=0.01, class_weight=None, penalty=l2, solver=liblinear;, score=0.968 total time=   0.0s
[CV 20/50] END C=0.01, class_weight=None, penalty=l2, solver=liblinear;, score=0.960 total time=   0.0s
[CV 21/50] END C=0.01, class_weight=None, penalty=l2, solver=liblinear;, score=0.956 total time=   0.0s
[CV 22/50] END C=0.01, class_weight=None, penalty=l2, solver=liblinear;, score=0.966 total time=   0.0s
[CV 23/50] END C=0.01, class_weight=None, penalty=l2, solver=liblinear;, score=0.967 total time=   0.0s
[CV 24/50] END C=0.01, class_weight=None, penalty=l2, solver=lib

[CV 50/50] END C=0.027825594022071243, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.967 total time=   0.0s
[CV 1/50] END C=0.027825594022071243, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.962 total time=   0.0s
[CV 2/50] END C=0.027825594022071243, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.968 total time=   0.0s
[CV 3/50] END C=0.027825594022071243, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.972 total time=   0.0s
[CV 4/50] END C=0.027825594022071243, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.967 total time=   0.0s
[CV 5/50] END C=0.027825594022071243, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.963 total time=   0.0s
[CV 6/50] END C=0.027825594022071243, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.965 total time=   0.0s
[CV 7/50] END C=0.027825594022071243, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.971 total time=   0.0s
[CV 8/50] END C

[CV 27/50] END C=0.027825594022071243, class_weight=None, penalty=l1, solver=liblinear;, score=0.967 total time=   0.0s
[CV 28/50] END C=0.027825594022071243, class_weight=None, penalty=l1, solver=liblinear;, score=0.962 total time=   0.0s
[CV 29/50] END C=0.027825594022071243, class_weight=None, penalty=l1, solver=liblinear;, score=0.962 total time=   0.0s
[CV 30/50] END C=0.027825594022071243, class_weight=None, penalty=l1, solver=liblinear;, score=0.960 total time=   0.0s
[CV 31/50] END C=0.027825594022071243, class_weight=None, penalty=l1, solver=liblinear;, score=0.966 total time=   0.0s
[CV 32/50] END C=0.027825594022071243, class_weight=None, penalty=l1, solver=liblinear;, score=0.966 total time=   0.0s
[CV 33/50] END C=0.027825594022071243, class_weight=None, penalty=l1, solver=liblinear;, score=0.952 total time=   0.0s
[CV 34/50] END C=0.027825594022071243, class_weight=None, penalty=l1, solver=liblinear;, score=0.969 total time=   0.0s
[CV 35/50] END C=0.027825594022071243, c

[CV 46/50] END C=0.027825594022071243, class_weight=None, penalty=l2, solver=liblinear;, score=0.963 total time=   0.0s
[CV 47/50] END C=0.027825594022071243, class_weight=None, penalty=l2, solver=liblinear;, score=0.970 total time=   0.0s
[CV 48/50] END C=0.027825594022071243, class_weight=None, penalty=l2, solver=liblinear;, score=0.958 total time=   0.0s
[CV 49/50] END C=0.027825594022071243, class_weight=None, penalty=l2, solver=liblinear;, score=0.971 total time=   0.0s
[CV 50/50] END C=0.027825594022071243, class_weight=None, penalty=l2, solver=liblinear;, score=0.973 total time=   0.0s
[CV 1/50] END C=0.0774263682681127, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.962 total time=   0.0s
[CV 2/50] END C=0.0774263682681127, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.968 total time=   0.0s
[CV 3/50] END C=0.0774263682681127, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.972 total time=   0.0s
[CV 4/50] END C=0.0774263682681127, c

[CV 23/50] END C=0.0774263682681127, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 24/50] END C=0.0774263682681127, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 25/50] END C=0.0774263682681127, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.966 total time=   0.0s
[CV 26/50] END C=0.0774263682681127, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.972 total time=   0.0s
[CV 27/50] END C=0.0774263682681127, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.975 total time=   0.0s
[CV 28/50] END C=0.0774263682681127, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.968 total time=   0.0s
[CV 29/50] END C=0.0774263682681127, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.970 total time=   0.0s
[CV 30/50] END C=0.0774263682681127, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.963 total time=   0.0s
[CV 31/50] END C=0.07742

[CV 41/50] END C=0.0774263682681127, class_weight=None, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 42/50] END C=0.0774263682681127, class_weight=None, penalty=l1, solver=liblinear;, score=0.959 total time=   0.0s
[CV 43/50] END C=0.0774263682681127, class_weight=None, penalty=l1, solver=liblinear;, score=0.965 total time=   0.0s
[CV 44/50] END C=0.0774263682681127, class_weight=None, penalty=l1, solver=liblinear;, score=0.974 total time=   0.0s
[CV 45/50] END C=0.0774263682681127, class_weight=None, penalty=l1, solver=liblinear;, score=0.963 total time=   0.0s
[CV 46/50] END C=0.0774263682681127, class_weight=None, penalty=l1, solver=liblinear;, score=0.966 total time=   0.0s
[CV 47/50] END C=0.0774263682681127, class_weight=None, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 48/50] END C=0.0774263682681127, class_weight=None, penalty=l1, solver=liblinear;, score=0.959 total time=   0.0s
[CV 49/50] END C=0.0774263682681127, class_weight=None, 

[CV 15/50] END C=0.21544346900318834, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.968 total time=   0.0s
[CV 16/50] END C=0.21544346900318834, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.974 total time=   0.0s
[CV 17/50] END C=0.21544346900318834, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.964 total time=   0.0s
[CV 18/50] END C=0.21544346900318834, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.972 total time=   0.0s
[CV 19/50] END C=0.21544346900318834, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.977 total time=   0.0s
[CV 20/50] END C=0.21544346900318834, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.965 total time=   0.0s
[CV 21/50] END C=0.21544346900318834, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.965 total time=   0.0s
[CV 22/50] END C=0.21544346900318834, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.973 total time=   0.0s
[CV 23/50] END C

[CV 33/50] END C=0.21544346900318834, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.965 total time=   0.0s
[CV 34/50] END C=0.21544346900318834, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.975 total time=   0.0s
[CV 35/50] END C=0.21544346900318834, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.969 total time=   0.0s
[CV 36/50] END C=0.21544346900318834, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.971 total time=   0.0s
[CV 37/50] END C=0.21544346900318834, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.970 total time=   0.0s
[CV 38/50] END C=0.21544346900318834, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.962 total time=   0.0s
[CV 39/50] END C=0.21544346900318834, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.970 total time=   0.0s
[CV 40/50] END C=0.21544346900318834, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 41/50] END C

[CV 8/50] END C=0.21544346900318834, class_weight=None, penalty=l2, solver=liblinear;, score=0.971 total time=   0.0s
[CV 9/50] END C=0.21544346900318834, class_weight=None, penalty=l2, solver=liblinear;, score=0.972 total time=   0.0s
[CV 10/50] END C=0.21544346900318834, class_weight=None, penalty=l2, solver=liblinear;, score=0.972 total time=   0.0s
[CV 11/50] END C=0.21544346900318834, class_weight=None, penalty=l2, solver=liblinear;, score=0.962 total time=   0.0s
[CV 12/50] END C=0.21544346900318834, class_weight=None, penalty=l2, solver=liblinear;, score=0.965 total time=   0.0s
[CV 13/50] END C=0.21544346900318834, class_weight=None, penalty=l2, solver=liblinear;, score=0.979 total time=   0.0s
[CV 14/50] END C=0.21544346900318834, class_weight=None, penalty=l2, solver=liblinear;, score=0.978 total time=   0.0s
[CV 15/50] END C=0.21544346900318834, class_weight=None, penalty=l2, solver=liblinear;, score=0.968 total time=   0.0s
[CV 16/50] END C=0.21544346900318834, class_weight

[CV 31/50] END C=0.5994842503189409, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 32/50] END C=0.5994842503189409, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 33/50] END C=0.5994842503189409, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.966 total time=   0.0s
[CV 34/50] END C=0.5994842503189409, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 35/50] END C=0.5994842503189409, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 36/50] END C=0.5994842503189409, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 37/50] END C=0.5994842503189409, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 38/50] END C=0.5994842503189409, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.964 total time=   0.0s
[CV 39/50] END C=0.59948

[CV 1/50] END C=0.5994842503189409, class_weight=None, penalty=l1, solver=liblinear;, score=0.970 total time=   0.0s
[CV 2/50] END C=0.5994842503189409, class_weight=None, penalty=l1, solver=liblinear;, score=0.968 total time=   0.0s
[CV 3/50] END C=0.5994842503189409, class_weight=None, penalty=l1, solver=liblinear;, score=0.980 total time=   0.0s
[CV 4/50] END C=0.5994842503189409, class_weight=None, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 5/50] END C=0.5994842503189409, class_weight=None, penalty=l1, solver=liblinear;, score=0.969 total time=   0.0s
[CV 6/50] END C=0.5994842503189409, class_weight=None, penalty=l1, solver=liblinear;, score=0.969 total time=   0.0s
[CV 7/50] END C=0.5994842503189409, class_weight=None, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 8/50] END C=0.5994842503189409, class_weight=None, penalty=l1, solver=liblinear;, score=0.974 total time=   0.0s
[CV 9/50] END C=0.5994842503189409, class_weight=None, penalty=l

[CV 22/50] END C=0.5994842503189409, class_weight=None, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 23/50] END C=0.5994842503189409, class_weight=None, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 24/50] END C=0.5994842503189409, class_weight=None, penalty=l2, solver=liblinear;, score=0.976 total time=   0.0s
[CV 25/50] END C=0.5994842503189409, class_weight=None, penalty=l2, solver=liblinear;, score=0.968 total time=   0.0s
[CV 26/50] END C=0.5994842503189409, class_weight=None, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 27/50] END C=0.5994842503189409, class_weight=None, penalty=l2, solver=liblinear;, score=0.978 total time=   0.0s
[CV 28/50] END C=0.5994842503189409, class_weight=None, penalty=l2, solver=liblinear;, score=0.969 total time=   0.0s
[CV 29/50] END C=0.5994842503189409, class_weight=None, penalty=l2, solver=liblinear;, score=0.973 total time=   0.0s
[CV 30/50] END C=0.5994842503189409, class_weight=None, 

[CV 42/50] END C=1.6681005372000592, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.967 total time=   0.0s
[CV 43/50] END C=1.6681005372000592, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 44/50] END C=1.6681005372000592, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 45/50] END C=1.6681005372000592, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.965 total time=   0.0s
[CV 46/50] END C=1.6681005372000592, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.972 total time=   0.0s
[CV 47/50] END C=1.6681005372000592, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 48/50] END C=1.6681005372000592, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.965 total time=   0.0s
[CV 49/50] END C=1.6681005372000592, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.977 total time=   0.0s
[CV 50/50] END C=1.66810

[CV 16/50] END C=1.6681005372000592, class_weight=None, penalty=l1, solver=liblinear;, score=0.977 total time=   0.0s
[CV 17/50] END C=1.6681005372000592, class_weight=None, penalty=l1, solver=liblinear;, score=0.967 total time=   0.0s
[CV 18/50] END C=1.6681005372000592, class_weight=None, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 19/50] END C=1.6681005372000592, class_weight=None, penalty=l1, solver=liblinear;, score=0.977 total time=   0.0s
[CV 20/50] END C=1.6681005372000592, class_weight=None, penalty=l1, solver=liblinear;, score=0.965 total time=   0.0s
[CV 21/50] END C=1.6681005372000592, class_weight=None, penalty=l1, solver=liblinear;, score=0.965 total time=   0.0s
[CV 22/50] END C=1.6681005372000592, class_weight=None, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 23/50] END C=1.6681005372000592, class_weight=None, penalty=l1, solver=liblinear;, score=0.974 total time=   0.0s
[CV 24/50] END C=1.6681005372000592, class_weight=None, 

[CV 41/50] END C=1.6681005372000592, class_weight=None, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 42/50] END C=1.6681005372000592, class_weight=None, penalty=l2, solver=liblinear;, score=0.967 total time=   0.0s
[CV 43/50] END C=1.6681005372000592, class_weight=None, penalty=l2, solver=liblinear;, score=0.970 total time=   0.0s
[CV 44/50] END C=1.6681005372000592, class_weight=None, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 45/50] END C=1.6681005372000592, class_weight=None, penalty=l2, solver=liblinear;, score=0.966 total time=   0.0s
[CV 46/50] END C=1.6681005372000592, class_weight=None, penalty=l2, solver=liblinear;, score=0.971 total time=   0.0s
[CV 47/50] END C=1.6681005372000592, class_weight=None, penalty=l2, solver=liblinear;, score=0.975 total time=   0.0s
[CV 48/50] END C=1.6681005372000592, class_weight=None, penalty=l2, solver=liblinear;, score=0.964 total time=   0.0s
[CV 49/50] END C=1.6681005372000592, class_weight=None, 

[CV 9/50] END C=4.6415888336127775, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.973 total time=   0.0s
[CV 10/50] END C=4.6415888336127775, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.976 total time=   0.0s
[CV 11/50] END C=4.6415888336127775, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.963 total time=   0.0s
[CV 12/50] END C=4.6415888336127775, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.967 total time=   0.0s
[CV 13/50] END C=4.6415888336127775, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.980 total time=   0.0s
[CV 14/50] END C=4.6415888336127775, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.978 total time=   0.0s
[CV 15/50] END C=4.6415888336127775, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.970 total time=   0.0s
[CV 16/50] END C=4.6415888336127775, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.976 total time=   0.0s
[CV 17/50] END C=4.641588

[CV 30/50] END C=4.6415888336127775, class_weight=None, penalty=l1, solver=liblinear;, score=0.967 total time=   0.0s
[CV 31/50] END C=4.6415888336127775, class_weight=None, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 32/50] END C=4.6415888336127775, class_weight=None, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 33/50] END C=4.6415888336127775, class_weight=None, penalty=l1, solver=liblinear;, score=0.967 total time=   0.0s
[CV 34/50] END C=4.6415888336127775, class_weight=None, penalty=l1, solver=liblinear;, score=0.974 total time=   0.0s
[CV 35/50] END C=4.6415888336127775, class_weight=None, penalty=l1, solver=liblinear;, score=0.972 total time=   0.0s
[CV 36/50] END C=4.6415888336127775, class_weight=None, penalty=l1, solver=liblinear;, score=0.970 total time=   0.0s
[CV 37/50] END C=4.6415888336127775, class_weight=None, penalty=l1, solver=liblinear;, score=0.972 total time=   0.0s
[CV 38/50] END C=4.6415888336127775, class_weight=None, 

[CV 49/50] END C=4.6415888336127775, class_weight=None, penalty=l2, solver=liblinear;, score=0.976 total time=   0.0s
[CV 50/50] END C=4.6415888336127775, class_weight=None, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 1/50] END C=12.915496650148826, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.970 total time=   0.0s
[CV 2/50] END C=12.915496650148826, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.968 total time=   0.0s
[CV 3/50] END C=12.915496650148826, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.981 total time=   0.0s
[CV 4/50] END C=12.915496650148826, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 5/50] END C=12.915496650148826, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.969 total time=   0.0s
[CV 6/50] END C=12.915496650148826, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.968 total time=   0.0s
[CV 7/50] END C=12.915496650148826, cl

[CV 18/50] END C=12.915496650148826, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.975 total time=   0.0s
[CV 19/50] END C=12.915496650148826, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.976 total time=   0.0s
[CV 20/50] END C=12.915496650148826, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.964 total time=   0.0s
[CV 21/50] END C=12.915496650148826, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.965 total time=   0.0s
[CV 22/50] END C=12.915496650148826, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.975 total time=   0.0s
[CV 23/50] END C=12.915496650148826, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 24/50] END C=12.915496650148826, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.977 total time=   0.0s
[CV 25/50] END C=12.915496650148826, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.970 total time=   0.0s
[CV 26/50] END C=12.9154

[CV 40/50] END C=12.915496650148826, class_weight=None, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 41/50] END C=12.915496650148826, class_weight=None, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 42/50] END C=12.915496650148826, class_weight=None, penalty=l1, solver=liblinear;, score=0.969 total time=   0.0s
[CV 43/50] END C=12.915496650148826, class_weight=None, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 44/50] END C=12.915496650148826, class_weight=None, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 45/50] END C=12.915496650148826, class_weight=None, penalty=l1, solver=liblinear;, score=0.964 total time=   0.0s
[CV 46/50] END C=12.915496650148826, class_weight=None, penalty=l1, solver=liblinear;, score=0.972 total time=   0.0s
[CV 47/50] END C=12.915496650148826, class_weight=None, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 48/50] END C=12.915496650148826, class_weight=None, 

[CV 14/50] END C=35.93813663804626, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.979 total time=   0.0s
[CV 15/50] END C=35.93813663804626, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.970 total time=   0.0s
[CV 16/50] END C=35.93813663804626, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.977 total time=   0.0s
[CV 17/50] END C=35.93813663804626, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.967 total time=   0.0s
[CV 18/50] END C=35.93813663804626, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 19/50] END C=35.93813663804626, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 20/50] END C=35.93813663804626, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.964 total time=   0.0s
[CV 21/50] END C=35.93813663804626, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.966 total time=   0.0s
[CV 22/50] END C=35.938136638046

[CV 33/50] END C=35.93813663804626, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.968 total time=   0.0s
[CV 34/50] END C=35.93813663804626, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 35/50] END C=35.93813663804626, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.971 total time=   0.0s
[CV 36/50] END C=35.93813663804626, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.970 total time=   0.0s
[CV 37/50] END C=35.93813663804626, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.971 total time=   0.0s
[CV 38/50] END C=35.93813663804626, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.966 total time=   0.0s
[CV 39/50] END C=35.93813663804626, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.971 total time=   0.0s
[CV 40/50] END C=35.93813663804626, class_weight=balanced, penalty=l2, solver=liblinear;, score=0.976 total time=   0.0s
[CV 41/50] END C=35.938136638046

[CV 4/50] END C=35.93813663804626, class_weight=None, penalty=l2, solver=liblinear;, score=0.970 total time=   0.0s
[CV 5/50] END C=35.93813663804626, class_weight=None, penalty=l2, solver=liblinear;, score=0.969 total time=   0.0s
[CV 6/50] END C=35.93813663804626, class_weight=None, penalty=l2, solver=liblinear;, score=0.968 total time=   0.0s
[CV 7/50] END C=35.93813663804626, class_weight=None, penalty=l2, solver=liblinear;, score=0.976 total time=   0.0s
[CV 8/50] END C=35.93813663804626, class_weight=None, penalty=l2, solver=liblinear;, score=0.975 total time=   0.0s
[CV 9/50] END C=35.93813663804626, class_weight=None, penalty=l2, solver=liblinear;, score=0.974 total time=   0.0s
[CV 10/50] END C=35.93813663804626, class_weight=None, penalty=l2, solver=liblinear;, score=0.976 total time=   0.0s
[CV 11/50] END C=35.93813663804626, class_weight=None, penalty=l2, solver=liblinear;, score=0.963 total time=   0.0s
[CV 12/50] END C=35.93813663804626, class_weight=None, penalty=l2, sol

[CV 38/50] END C=100.0, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.966 total time=   0.0s
[CV 39/50] END C=100.0, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 40/50] END C=100.0, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 41/50] END C=100.0, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 42/50] END C=100.0, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.968 total time=   0.0s
[CV 43/50] END C=100.0, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 44/50] END C=100.0, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 45/50] END C=100.0, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.963 total time=   0.0s
[CV 46/50] END C=100.0, class_weight=balanced, penalty=l1, solver=liblinear;, score=0.971 total time=   0.0s
[CV 47/50] END C=10

[CV 14/50] END C=100.0, class_weight=None, penalty=l1, solver=liblinear;, score=0.978 total time=   0.0s
[CV 15/50] END C=100.0, class_weight=None, penalty=l1, solver=liblinear;, score=0.970 total time=   0.0s
[CV 16/50] END C=100.0, class_weight=None, penalty=l1, solver=liblinear;, score=0.977 total time=   0.0s
[CV 17/50] END C=100.0, class_weight=None, penalty=l1, solver=liblinear;, score=0.967 total time=   0.0s
[CV 18/50] END C=100.0, class_weight=None, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 19/50] END C=100.0, class_weight=None, penalty=l1, solver=liblinear;, score=0.976 total time=   0.0s
[CV 20/50] END C=100.0, class_weight=None, penalty=l1, solver=liblinear;, score=0.964 total time=   0.0s
[CV 21/50] END C=100.0, class_weight=None, penalty=l1, solver=liblinear;, score=0.966 total time=   0.0s
[CV 22/50] END C=100.0, class_weight=None, penalty=l1, solver=liblinear;, score=0.975 total time=   0.0s
[CV 23/50] END C=100.0, class_weight=None, penalty=l1, 

[CV 44/50] END C=100.0, class_weight=None, penalty=l2, solver=liblinear;, score=0.975 total time=   0.0s
[CV 45/50] END C=100.0, class_weight=None, penalty=l2, solver=liblinear;, score=0.964 total time=   0.0s
[CV 46/50] END C=100.0, class_weight=None, penalty=l2, solver=liblinear;, score=0.972 total time=   0.0s
[CV 47/50] END C=100.0, class_weight=None, penalty=l2, solver=liblinear;, score=0.976 total time=   0.0s
[CV 48/50] END C=100.0, class_weight=None, penalty=l2, solver=liblinear;, score=0.965 total time=   0.0s
[CV 49/50] END C=100.0, class_weight=None, penalty=l2, solver=liblinear;, score=0.977 total time=   0.0s
[CV 50/50] END C=100.0, class_weight=None, penalty=l2, solver=liblinear;, score=0.973 total time=   0.0s
Wall time: 51.9 s


GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=10, n_splits=5, random_state=42),
             estimator=LogisticRegression(),
             param_grid=[{'C': array([1.00000000e-02, 2.78255940e-02, 7.74263683e-02, 2.15443469e-01,
       5.99484250e-01, 1.66810054e+00, 4.64158883e+00, 1.29154967e+01,
       3.59381366e+01, 1.00000000e+02]),
                          'class_weight': ['balanced', None],
                          'penalty': ['l1', 'l2'], 'solver': ['liblinear']}],
             scoring='roc_auc', verbose=3)

In [62]:
search.best_estimator_

LogisticRegression(C=100.0, class_weight='balanced', penalty='l1',
                   solver='liblinear')

In [63]:
search.best_score_

0.9722080456185189

In [77]:
train_set, validation_set = train_test_split(
...     training_data.loc[:, best_features], test_size=0.2, random_state=0)

In [78]:
X_train = train_set.iloc[:,:-1].values
y_train = train_set.iloc[:,-1].values

X_val = validation_set.iloc[:,:-1].values
y_val = validation_set.iloc[:,-1].values

In [79]:
X_train = RobustScaler().fit_transform(X_train)
X_val = RobustScaler().fit_transform(X_val)

In [80]:
lr = search.best_estimator_

In [81]:
clf = lr.fit(X_train, y_train)

In [82]:
print("training score ", clf.score(X_train, y_train))
print("validation score ", clf.score(X_val, y_val))

training score  0.9315856777493606
validation score  0.9373401534526854


In [87]:
# Predict Y on test data and preview few values and save the predictions with original features
test_data['Y'] = clf.predict(RobustScaler().fit_transform(test_data.loc[:, best_features[:-1]]))
print(test_data)
test_data.to_csv('predictions on test data.csv')

       X1    X2    X3   X4    X5    X6    X7    X8    X9   X10  ...    X49  \
0    0.70  0.00  0.70  0.0  0.00  0.00  0.00  0.00  0.00  0.00  ...  0.000   
1    0.00  0.00  0.84  0.0  0.84  0.00  0.84  0.00  0.00  0.00  ...  0.000   
2    0.46  0.30  0.46  0.0  0.05  0.12  0.05  0.28  0.43  0.74  ...  0.000   
3    0.10  0.20  1.01  0.0  0.80  0.80  0.50  0.00  0.80  0.10  ...  0.000   
4    0.00  0.00  0.72  0.0  0.72  0.00  0.72  0.00  0.00  0.00  ...  0.000   
..    ...   ...   ...  ...   ...   ...   ...   ...   ...   ...  ...    ...   
686  0.25  0.00  0.00  0.0  0.00  0.00  0.00  0.00  0.25  0.00  ...  0.301   
687  0.00  0.00  0.00  0.0  0.00  0.00  0.00  0.00  0.00  0.00  ...  0.000   
688  0.00  0.00  0.00  0.0  0.00  0.00  0.00  0.00  0.00  0.00  ...  0.000   
689  0.08  0.08  0.57  0.0  0.48  0.00  0.00  0.08  0.00  0.00  ...  0.011   
690  0.00  0.00  0.00  0.0  0.00  0.00  0.00  0.00  0.00  0.00  ...  0.000   

       X50    X51    X52    X53    X54     X55  X56   X57  Y  


In [88]:
!pip3 freeze > requirements.txt