In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV

In [2]:
# Paths
prefix = '/home/walter/Documents/personal_projects/new-titan/data/processed'
selected_model_path = '/home/walter/Documents/personal_projects/new-titan/notebooks/experiments/exp_02/artifacts/selected_model.pkl'
tunned_model_path = '/home/walter/Documents/personal_projects/new-titan/notebooks/experiments/exp_02/artifacts/tunned_model.pkl'

# Tunning
learning_rate = [0.01, 0.05, 0.1, 0.3, 0.5]
n_estimators = [50, 100, 300, 500]
max_depth = [2,3,4,5,6]

cv = 5
n_iter = 20
score = 'accuracy'

In [3]:
def load_model(path):
    pkl_file = open(path, 'rb')
    selected_model = pickle.load(pkl_file)
    pkl_file.close()

    return selected_model

def load_data(prefix):
    X_train = np.genfromtxt(os.path.join(prefix, 'data_train', 'X_train.csv'), delimiter=',')
    y_train = np.genfromtxt(os.path.join(prefix, 'data_train', 'y_train.csv'), delimiter=',').astype('int')
    label_train = np.genfromtxt(os.path.join(prefix, 'data_train', 'label_train.csv'), delimiter=',')
    X_test = np.genfromtxt(os.path.join(prefix, 'data_test', 'X_test.csv'), delimiter=',')
    y_test = np.genfromtxt(os.path.join(prefix, 'data_test', 'y_test.csv'), delimiter=',').astype('int')
    label_test = np.genfromtxt(os.path.join(prefix, 'data_test', 'label_test.csv'), delimiter=',')
   
    return X_train, y_train, label_train, X_test, y_test, label_test

In [4]:
# load model and data
model = load_model(selected_model_path)
X, y, label, X_test, y_test, label_test = load_data(prefix)
model

In [5]:
params = {
'learning_rate': learning_rate, 
'n_estimators': n_estimators,
'max_depth': max_depth 
}

search = RandomizedSearchCV(
    estimator=model,
    param_distributions=params,
    scoring=score,
    cv=cv,
    n_iter=n_iter,
    return_train_score=False,
    random_state=100,
    refit=True
)

search.fit(X, y)

In [6]:
pd.DataFrame(search.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.125128,0.017493,0.000857,0.000126,100,6,0.05,"{'n_estimators': 100, 'max_depth': 6, 'learnin...",0.807453,0.78125,0.875,0.85,0.78125,0.818991,0.037631,1
1,0.104542,0.005332,0.000784,0.000126,300,2,0.3,"{'n_estimators': 300, 'max_depth': 2, 'learnin...",0.751553,0.7875,0.8375,0.825,0.8,0.800311,0.030114,20
2,0.141219,0.005319,0.0009,0.000135,300,3,0.05,"{'n_estimators': 300, 'max_depth': 3, 'learnin...",0.795031,0.8,0.84375,0.85625,0.78125,0.815256,0.029294,4
3,0.03499,0.00121,0.000499,3.1e-05,100,2,0.1,"{'n_estimators': 100, 'max_depth': 2, 'learnin...",0.801242,0.78125,0.86875,0.8375,0.79375,0.816498,0.032137,2
4,0.878735,0.313167,0.00415,0.001263,500,5,0.05,"{'n_estimators': 500, 'max_depth': 5, 'learnin...",0.78882,0.825,0.8125,0.85,0.79375,0.814014,0.022196,6
5,0.130513,0.001443,0.001246,2.8e-05,100,3,0.05,"{'n_estimators': 100, 'max_depth': 3, 'learnin...",0.795031,0.7875,0.85625,0.8375,0.78125,0.811506,0.029802,10
6,0.159828,0.006827,0.001371,3.4e-05,50,6,0.05,"{'n_estimators': 50, 'max_depth': 6, 'learning...",0.807453,0.775,0.875,0.83125,0.7875,0.815241,0.035432,5
7,0.242337,0.006331,0.00176,0.000116,100,5,0.05,"{'n_estimators': 100, 'max_depth': 5, 'learnin...",0.78882,0.8,0.8625,0.825,0.7875,0.812764,0.028277,7
8,0.360016,0.026932,0.002441,0.000358,100,6,0.3,"{'n_estimators': 100, 'max_depth': 6, 'learnin...",0.78882,0.8,0.825,0.8375,0.76875,0.804014,0.024707,16
9,0.111245,0.007696,0.001204,0.000128,100,2,0.05,"{'n_estimators': 100, 'max_depth': 2, 'learnin...",0.801242,0.75625,0.85625,0.8375,0.78125,0.806498,0.036404,14


In [7]:
with open(tunned_model_path, 'wb') as out:
    pickle.dump(search.best_estimator_, out)

search.best_estimator_