# 04 - GridSearchCV - SGD

#### Imports

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="white")

#### Constants

In [2]:
n_components = 1000

In [3]:
models_folder = "models/"
train_data_fn = models_folder+'train_data.pkl'
target_fn = models_folder+'target.pkl'
test_data_fn = models_folder+'test_data.pkl'

weight_multiplier_fn = models_folder+"weight_multiplier.pkl"

#### Functions

In [4]:
import os.path
from sklearn.externals import joblib

def Load(filename):
    if os.path.isfile(filename):
        return joblib.load(filename)
    
def Save(obj, filename):
    joblib.dump(obj, filename)

# Loading data

In [5]:
import scipy

data = scipy.sparse.load_npz("train_sparse_matrix_after_scale.npz")

target = Load(target_fn)

In [6]:
weight_multiplier = Load(weight_multiplier_fn)

## Splitting dataset

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_validation, Y_train, Y_validation = train_test_split(data, target.ravel(), train_size=0.8, random_state=42)



# CatBoost Classifier

In [9]:
from sklearn.linear_model import SGDClassifier
import random

In [12]:
tuned_parameters = {
    'loss':['hinge','perceptron'],
    'penalty':['l2', 'l1','elasticnet'],
    'alpha':[0.0001,0.004,0.02,0.00005],
    'l1_ratio':[0.15,0.05,0.4,0.8],
    'fit_intercept':[True],
    'max_iter':[10,100,200],
    'tol':[None,0.0001,0.001,0.01],
    'shuffle':[True,False],
    'verbose':[0],
    'epsilon':[0.1,0.5,0.8],
    'n_jobs':[2],
    'random_state':[42],
    'learning_rate':['optimal','invscaling'],
    'eta0':[0.1,0.04,0.01],
    'power_t':[0.5,0.9,0.1],
    'class_weight':[{0:1,1:1},{0:1,1:weight_multiplier},{0:1,1:1/weight_multiplier}],
    'warm_start':[False],
    'average':[False,10,100]
}

In [14]:
%%time
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

clf = RandomizedSearchCV(SGDClassifier(),
                   tuned_parameters,
                   cv=4,
                   n_iter=5,
                   n_jobs=7,
                   scoring='roc_auc',
                   random_state=42,
                   verbose=2)

CPU times: user 114 µs, sys: 19 µs, total: 133 µs
Wall time: 143 µs


In [15]:
%%time
clf.fit(X_train, Y_train)

Fitting 4 folds for each of 5 candidates, totalling 20 fits
[CV] warm_start=False, verbose=0, tol=0.001, shuffle=False, random_state=42, power_t=0.9, penalty=elasticnet, n_jobs=2, max_iter=100, loss=hinge, learning_rate=optimal, l1_ratio=0.05, fit_intercept=True, eta0=0.01, epsilon=0.8, class_weight={0: 1, 1: 1}, average=10, alpha=0.0001 
[CV] warm_start=False, verbose=0, tol=0.001, shuffle=False, random_state=42, power_t=0.9, penalty=elasticnet, n_jobs=2, max_iter=100, loss=hinge, learning_rate=optimal, l1_ratio=0.05, fit_intercept=True, eta0=0.01, epsilon=0.8, class_weight={0: 1, 1: 1}, average=10, alpha=0.0001 
[CV] warm_start=False, verbose=0, tol=0.001, shuffle=False, random_state=42, power_t=0.9, penalty=elasticnet, n_jobs=2, max_iter=100, loss=hinge, learning_rate=optimal, l1_ratio=0.05, fit_intercept=True, eta0=0.01, epsilon=0.8, class_weight={0: 1, 1: 1}, average=10, alpha=0.0001 
[CV] warm_start=False, verbose=0, tol=0.001, shuffle=False, random_state=42, power_t=0.9, penalty

[CV] warm_start=False, verbose=0, tol=None, shuffle=False, random_state=42, power_t=0.9, penalty=l1, n_jobs=2, max_iter=100, loss=hinge, learning_rate=invscaling, l1_ratio=0.8, fit_intercept=True, eta0=0.04, epsilon=0.5, class_weight={0: 1, 1: 1}, average=10, alpha=0.0001 
[CV]  warm_start=False, verbose=0, tol=0.001, shuffle=True, random_state=42, power_t=0.5, penalty=elasticnet, n_jobs=2, max_iter=200, loss=perceptron, learning_rate=invscaling, l1_ratio=0.8, fit_intercept=True, eta0=0.01, epsilon=0.1, class_weight={0: 1, 1: 0.05276699578395344}, average=100, alpha=0.0001, total=  52.1s
[CV] warm_start=False, verbose=0, tol=None, shuffle=False, random_state=42, power_t=0.9, penalty=l1, n_jobs=2, max_iter=100, loss=hinge, learning_rate=invscaling, l1_ratio=0.8, fit_intercept=True, eta0=0.04, epsilon=0.5, class_weight={0: 1, 1: 1}, average=10, alpha=0.0001 
[CV]  warm_start=False, verbose=0, tol=None, shuffle=False, random_state=42, power_t=0.9, penalty=l1, n_jobs=2, max_iter=100, loss=

[Parallel(n_jobs=7)]: Done  18 out of  20 | elapsed: 23.3min remaining:  2.6min


[CV]  warm_start=False, verbose=0, tol=None, shuffle=False, random_state=42, power_t=0.5, penalty=l1, n_jobs=2, max_iter=200, loss=hinge, learning_rate=invscaling, l1_ratio=0.15, fit_intercept=True, eta0=0.01, epsilon=0.1, class_weight={0: 1, 1: 18.951239977624464}, average=10, alpha=0.0001, total=18.9min
[CV]  warm_start=False, verbose=0, tol=None, shuffle=False, random_state=42, power_t=0.9, penalty=l1, n_jobs=2, max_iter=100, loss=hinge, learning_rate=invscaling, l1_ratio=0.8, fit_intercept=True, eta0=0.04, epsilon=0.5, class_weight={0: 1, 1: 1}, average=10, alpha=0.0001, total= 5.2min


[Parallel(n_jobs=7)]: Done  20 out of  20 | elapsed: 24.4min finished


CPU times: user 7min 16s, sys: 1.07 s, total: 7min 17s
Wall time: 30min 59s


RandomizedSearchCV(cv=4, error_score='raise',
          estimator=SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=None, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=None,
       shuffle=True, tol=None, verbose=0, warm_start=False),
          fit_params=None, iid=True, n_iter=5, n_jobs=7,
          param_distributions={'loss': ['hinge', 'perceptron'], 'penalty': ['l2', 'l1', 'elasticnet'], 'alpha': [0.0001, 0.004, 0.02, 5e-05], 'l1_ratio': [0.15, 0.05, 0.4, 0.8], 'fit_intercept': [True], 'max_iter': [10, 100, 200], 'tol': [None, 0.0001, 0.001, 0.01], 'shuffle': [True, False], 'verbose': [0], '...1239977624464}, {0: 1, 1: 0.05276699578395344}], 'warm_start': [False], 'average': [False, 10, 100]},
          pre_dispatch='2*n_jobs', random_state=42, refit=True,
          return_train_score='warn', scoring='roc_auc', verbose=2)

In [16]:
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [17]:
print("RandomizedSearchCV")
report(clf.cv_results_)

RandomizedSearchCV
Model with rank: 1
Mean validation score: 0.578 (std: 0.006)
Parameters: {'warm_start': False, 'verbose': 0, 'tol': None, 'shuffle': False, 'random_state': 42, 'power_t': 0.5, 'penalty': 'l1', 'n_jobs': 2, 'max_iter': 200, 'loss': 'hinge', 'learning_rate': 'invscaling', 'l1_ratio': 0.15, 'fit_intercept': True, 'eta0': 0.01, 'epsilon': 0.1, 'class_weight': {0: 1, 1: 18.951239977624464}, 'average': 10, 'alpha': 0.0001}

Model with rank: 2
Mean validation score: 0.542 (std: 0.016)
Parameters: {'warm_start': False, 'verbose': 0, 'tol': None, 'shuffle': False, 'random_state': 42, 'power_t': 0.9, 'penalty': 'l1', 'n_jobs': 2, 'max_iter': 100, 'loss': 'hinge', 'learning_rate': 'invscaling', 'l1_ratio': 0.8, 'fit_intercept': True, 'eta0': 0.04, 'epsilon': 0.5, 'class_weight': {0: 1, 1: 1}, 'average': 10, 'alpha': 0.0001}

Model with rank: 3
Mean validation score: 0.539 (std: 0.006)
Parameters: {'warm_start': False, 'verbose': 0, 'tol': 0.001, 'shuffle': False, 'random_state'

In [25]:
params = clf.best_params_
# params = {'warm_start': False, 'verbose': 0, 'tol': None, 'shuffle': False, 'random_state': 42, 'power_t': 0.5, 'penalty': 'l1', 'n_jobs': 2, 'max_iter': 200, 'loss': 'hinge', 'learning_rate': 'invscaling', 'l1_ratio': 0.15, 'fit_intercept': True, 'eta0': 0.01, 'epsilon': 0.1, 'class_weight': {0: 1, 1: 18.951239977624464}, 'average': 10, 'alpha': 0.0001}
params['n_jobs']=-1
params['verbose']=2

In [12]:
evals_results = {}
num_boost_round=3000
early_stopping_rounds=200
feval=None

model = SGDClassifier(**params)
model.fit(X_train,Y_train)

-- Epoch 1
Norm: 13.60, NNZs: 33570, Bias: 0.001022, T: 342395, Avg. loss: 3.820180
Total training time: 1.78 seconds.
-- Epoch 2
Norm: 12.99, NNZs: 33430, Bias: 0.001310, T: 684790, Avg. loss: 2.550982
Total training time: 3.54 seconds.
-- Epoch 3
Norm: 12.62, NNZs: 33266, Bias: 0.001563, T: 1027185, Avg. loss: 2.045418
Total training time: 5.41 seconds.
-- Epoch 4
Norm: 12.37, NNZs: 33013, Bias: 0.001826, T: 1369580, Avg. loss: 1.784594
Total training time: 7.25 seconds.
-- Epoch 5
Norm: 12.19, NNZs: 32799, Bias: 0.002072, T: 1711975, Avg. loss: 1.618152
Total training time: 9.02 seconds.
-- Epoch 6
Norm: 12.05, NNZs: 32650, Bias: 0.002287, T: 2054370, Avg. loss: 1.495988
Total training time: 10.87 seconds.
-- Epoch 7
Norm: 11.94, NNZs: 32506, Bias: 0.002485, T: 2396765, Avg. loss: 1.415385
Total training time: 12.66 seconds.
-- Epoch 8
Norm: 11.84, NNZs: 32370, Bias: 0.002659, T: 2739160, Avg. loss: 1.357492
Total training time: 14.56 seconds.
-- Epoch 9
Norm: 11.77, NNZs: 32281, Bi

Norm: 10.80, NNZs: 30209, Bias: 0.007430, T: 23282860, Avg. loss: 0.866477
Total training time: 129.37 seconds.
-- Epoch 69
Norm: 10.79, NNZs: 30179, Bias: 0.007479, T: 23625255, Avg. loss: 0.865820
Total training time: 131.18 seconds.
-- Epoch 70
Norm: 10.79, NNZs: 30191, Bias: 0.007537, T: 23967650, Avg. loss: 0.864207
Total training time: 133.00 seconds.
-- Epoch 71
Norm: 10.78, NNZs: 30166, Bias: 0.007587, T: 24310045, Avg. loss: 0.861669
Total training time: 134.81 seconds.
-- Epoch 72
Norm: 10.78, NNZs: 30121, Bias: 0.007635, T: 24652440, Avg. loss: 0.860136
Total training time: 136.67 seconds.
-- Epoch 73
Norm: 10.78, NNZs: 30114, Bias: 0.007686, T: 24994835, Avg. loss: 0.859384
Total training time: 138.49 seconds.
-- Epoch 74
Norm: 10.77, NNZs: 30118, Bias: 0.007734, T: 25337230, Avg. loss: 0.858615
Total training time: 140.32 seconds.
-- Epoch 75
Norm: 10.77, NNZs: 30086, Bias: 0.007786, T: 25679625, Avg. loss: 0.856428
Total training time: 142.23 seconds.
-- Epoch 76
Norm: 10

Norm: 10.65, NNZs: 29220, Bias: 0.010181, T: 45880930, Avg. loss: 0.808821
Total training time: 250.43 seconds.
-- Epoch 135
Norm: 10.65, NNZs: 29246, Bias: 0.010214, T: 46223325, Avg. loss: 0.808896
Total training time: 252.23 seconds.
-- Epoch 136
Norm: 10.65, NNZs: 29200, Bias: 0.010246, T: 46565720, Avg. loss: 0.808266
Total training time: 254.03 seconds.
-- Epoch 137
Norm: 10.64, NNZs: 29187, Bias: 0.010281, T: 46908115, Avg. loss: 0.807744
Total training time: 255.83 seconds.
-- Epoch 138
Norm: 10.64, NNZs: 29197, Bias: 0.010318, T: 47250510, Avg. loss: 0.805712
Total training time: 257.63 seconds.
-- Epoch 139
Norm: 10.64, NNZs: 29175, Bias: 0.010351, T: 47592905, Avg. loss: 0.806580
Total training time: 259.43 seconds.
-- Epoch 140
Norm: 10.64, NNZs: 29170, Bias: 0.010388, T: 47935300, Avg. loss: 0.806587
Total training time: 261.23 seconds.
-- Epoch 141
Norm: 10.64, NNZs: 29177, Bias: 0.010419, T: 48277695, Avg. loss: 0.805847
Total training time: 263.04 seconds.
-- Epoch 142


Norm: 10.62, NNZs: 28619, Bias: 0.012242, T: 68479000, Avg. loss: 0.784178
Total training time: 373.02 seconds.


SGDClassifier(alpha=0.0001, average=10,
       class_weight={0: 1, 1: 18.951239977624464}, epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
       loss='hinge', max_iter=200, n_iter=None, n_jobs=-1, penalty='l1',
       power_t=0.5, random_state=42, shuffle=False, tol=None, verbose=2,
       warm_start=False)

In [None]:
from sklearn.metrics import roc_auc_score

predicted = model.predict(X_validation)
print("ROC AUC score:",roc_auc_score(Y_validation, predicted))

In [None]:
Save(model,"sgd_model.pkl")

# Test Data

In [None]:
test_data = scipy.sparse.load_npz("test_sparse_matrix_after_scale.npz")

In [None]:
Y_test = model.predict(test_data)
print(Y_test.max())
print(Y_test.mean())

## Saving test predictions

In [None]:
predictions = pd.DataFrame(Y_test)
predictions.to_csv("solution_sgd.csv",header=None, index=None)