# Training

In [1]:
import sys
sys.path.append('..')

In [2]:
# # Loading useful libraries

import numpy as np
import pandas as pd

import optuna
import os
import pickle

import Demo2

In [3]:
# Selecting "regression" or "classification"

model_type = "classification"

assert(model_type in ["regression", "classification"])

In [4]:
# Loading data

X_train = pd.read_csv(f'../tmp/X_train_{model_type}.csv', index_col='User_ID')
y_train = pd.read_csv(f'../tmp/y_train_{model_type}.csv', index_col='User_ID')['is_BigSpender']

In [5]:
objective = Demo2.hyperparameter_tuning.Objective(X_train=X_train, y_train=y_train)

# Creating a study object and optimize the objective function
optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100, show_progress_bar=True)

# Printing the best hyperparameters and corresponding accuracy
print('Best Trial:')
print('  Value: ', study.best_trial.value)
print('  Params: ')
for key, value in study.best_trial.params.items():
    print(f'    {key}: {value}')

  0%|          | 0/100 [00:00<?, ?it/s]

Best Trial:
  Value:  0.3931034482758621
  Params: 
    n_estimators: 847
    num_leaves: 46
    max_depth: 10


In [6]:
model = Demo2.training.create_model(study)
model.fit(X_train, y_train)

score_train = Demo2.training.evaluate(model, X_train, y_train)
score_train



0.599009900990099

In [7]:
scores = Demo2.training.evaluate(model, X_train, y_train,cross_val=True)
scores.mean()



0.3507605944415207

# Saving model

In [8]:
try:
   os.makedirs("../tmp")
except FileExistsError:
   # directory already exists
   pass

In [9]:
with open('../tmp/model.pkl', 'wb') as f:
    pickle.dump(model, f)