In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
import optuna
from optuna import Trial, visualization
from optuna.samplers import TPESampler
import pickle
# pd.set_option('display.max_column', None)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
data = pd.read_csv('./game_info.csv')

In [6]:
feature = data.iloc[:,1:-1]
target = data['Win']
X_train, X_test, y_train, y_test = train_test_split(feature, target, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

In [10]:
X_train.shape, X_val.shape, X_test.shape

((19504, 32), (4876, 32), (6095, 32))

In [18]:
def objective(trial: Trial, X, y, Xval,yval):
    trial.suggest_float
    params = {
        "n_estimators" : trial.suggest_int('n_estimators', 100, 1500),
        "criterion" : trial.suggest_categorical('criterion', ["gini","entropy"]),
        "max_depth" : trial.suggest_int('max_depth', 1,10 ),
        "min_samples_split" : trial.suggest_int('min_samples_split', 2,15 ),
        "min_samples_leaf" : trial.suggest_int('min_samples_split', 1,10 ),
        "oob_score" : trial.suggest_categorical('oob_score', [True, False] ),
        "n_jobs" : -1,
        "random_state" : 2
    }
    model = RandomForestClassifier(**params)
    RF_model = model.fit(X,y)
    score = roc_auc_score(yval,RF_model.predict(Xval))
    
    return score
study = optuna.create_study(direction='maximize')
study.optimize(lambda trial : objective(trial, X_train,y_train,X_val,y_val), n_trials=50 )
print('Best trial: score{},\n params{}'.format(study.best_trial.value, study.best_trial.params))

[I 2023-06-18 20:55:44,177] A new study created in memory with name: no-name-28469130-7ac7-473e-987e-0f47b38b72fa
[I 2023-06-18 20:55:47,302] Trial 0 finished with value: 0.980753267204241 and parameters: {'n_estimators': 835, 'criterion': 'gini', 'max_depth': 8, 'min_samples_split': 13, 'oob_score': False}. Best is trial 0 with value: 0.980753267204241.
[I 2023-06-18 20:55:50,740] Trial 1 finished with value: 0.9809554507869342 and parameters: {'n_estimators': 914, 'criterion': 'gini', 'max_depth': 8, 'min_samples_split': 10, 'oob_score': False}. Best is trial 1 with value: 0.9809554507869342.
[I 2023-06-18 20:55:54,071] Trial 2 finished with value: 0.9821921109194448 and parameters: {'n_estimators': 802, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 7, 'oob_score': False}. Best is trial 2 with value: 0.9821921109194448.
[I 2023-06-18 20:56:01,325] Trial 3 finished with value: 0.9752236345624714 and parameters: {'n_estimators': 1371, 'criterion': 'gini', 'max_depth': 6, '

Best trial: score0.9834346607110434,
 params{'n_estimators': 288, 'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 5, 'oob_score': True}


In [19]:
study.best_trial.params

{'n_estimators': 288,
 'criterion': 'gini',
 'max_depth': 10,
 'min_samples_split': 5,
 'oob_score': True}

In [21]:
model = RandomForestClassifier(**study.best_trial.params)
model.fit(X_train,y_train)
roc_auc_score(y_val, model.predict(X_val))

0.9821862212603568

In [41]:
with open('./lol_flask_app/views/model.pkl','wb') as file:
    pickle.dump(model, file)