In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV

In [2]:
# Paths
prefix = '/home/walter/Documents/personal_projects/new-titan/data/processed'
selected_model_path = '/home/walter/Documents/personal_projects/new-titan/notebooks/experiments/exp_01/artifacts/selected_model.pkl'
tunned_model_path = '/home/walter/Documents/personal_projects/new-titan/notebooks/experiments/exp_01/artifacts/tunned_model.pkl'

# Tunning
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
C = [100, 10, 1, 0.1, 0.001]
cv = 5
n_iter = 20
score = 'accuracy'

In [3]:
def load_model(path):
    pkl_file = open(path, 'rb')
    selected_model = pickle.load(pkl_file)
    pkl_file.close()

    return selected_model

def load_data(prefix):
    X_train = np.genfromtxt(os.path.join(prefix, 'data_train', 'X_train.csv'), delimiter=',')
    y_train = np.genfromtxt(os.path.join(prefix, 'data_train', 'y_train.csv'), delimiter=',').astype('int')
    label_train = np.genfromtxt(os.path.join(prefix, 'data_train', 'label_train.csv'), delimiter=',')
    X_test = np.genfromtxt(os.path.join(prefix, 'data_test', 'X_test.csv'), delimiter=',')
    y_test = np.genfromtxt(os.path.join(prefix, 'data_test', 'y_test.csv'), delimiter=',').astype('int')
    label_test = np.genfromtxt(os.path.join(prefix, 'data_test', 'label_test.csv'), delimiter=',')
   
    return X_train, y_train, label_train, X_test, y_test, label_test

In [4]:
# load model and data
model = load_model(selected_model_path)
X, y, label, X_test, y_test, label_test = load_data(prefix)
model

In [5]:
# tunning hyper
from sklearn.model_selection import RandomizedSearchCV


params = {
    'solver': solver,
    'C':C
}

search = RandomizedSearchCV(
    estimator=model,
    param_distributions=params,
    scoring=score,
    cv=cv,
    n_iter=n_iter,
    return_train_score=False,
    random_state=100,
    refit=True
)

search.fit(X.reshape(-1,1), y)

In [6]:
pd.DataFrame(search.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_solver,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001168,0.000202,0.000239,4.8e-05,saga,10.0,"{'solver': 'saga', 'C': 10}",0.807453,0.75,0.825,0.7875,0.7875,0.791491,0.025024,1
1,0.000865,0.000271,0.00027,5.3e-05,liblinear,0.001,"{'solver': 'liblinear', 'C': 0.001}",0.614907,0.61875,0.61875,0.61875,0.61875,0.617981,0.001537,16
2,0.000983,8.2e-05,0.000155,1.8e-05,sag,1.0,"{'solver': 'sag', 'C': 1}",0.807453,0.75,0.825,0.7875,0.7875,0.791491,0.025024,1
3,0.001669,0.00037,0.00026,5e-05,lbfgs,1.0,"{'solver': 'lbfgs', 'C': 1}",0.807453,0.75,0.825,0.7875,0.7875,0.791491,0.025024,1
4,0.001253,8.1e-05,0.000177,2.3e-05,newton-cg,10.0,"{'solver': 'newton-cg', 'C': 10}",0.807453,0.75,0.825,0.7875,0.7875,0.791491,0.025024,1
5,0.001016,8.3e-05,0.000181,3.6e-05,saga,0.1,"{'solver': 'saga', 'C': 0.1}",0.807453,0.75,0.825,0.7875,0.7875,0.791491,0.025024,1
6,0.000947,0.000108,0.000151,1e-05,sag,0.001,"{'solver': 'sag', 'C': 0.001}",0.614907,0.61875,0.61875,0.61875,0.61875,0.617981,0.001537,16
7,0.001103,8.9e-05,0.000196,3.2e-05,lbfgs,0.001,"{'solver': 'lbfgs', 'C': 0.001}",0.614907,0.61875,0.61875,0.61875,0.61875,0.617981,0.001537,16
8,0.001213,0.000114,0.000151,1.5e-05,lbfgs,10.0,"{'solver': 'lbfgs', 'C': 10}",0.807453,0.75,0.825,0.7875,0.7875,0.791491,0.025024,1
9,0.000433,2.8e-05,0.000146,2.7e-05,liblinear,1.0,"{'solver': 'liblinear', 'C': 1}",0.807453,0.75,0.825,0.7875,0.7875,0.791491,0.025024,1


In [7]:
with open(tunned_model_path, 'wb') as out:
    pickle.dump(search.best_estimator_, out)

search.best_estimator_