In [1]:
import optuna

import pandas as pd

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier,HistGradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [2]:
df = pd.read_csv('creditcard.csv').drop(['Time'],axis=1)
df

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,4.356170,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,-0.975926,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,-0.484782,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,-0.399126,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [3]:
train_df, test_df = train_test_split(df, test_size=0.2)

target = 'Class'

y_train = train_df[target]
x_train = train_df.copy().drop([target],axis=1)

y_test = test_df[target]
x_test = test_df.copy().drop([target],axis=1)

In [4]:
def rf_param_function(trial):
    params = {
        "n_estimators":trial.suggest_int("n_estiamtors",100,500,step=100),
        "max_depth":trial.suggest_int("max_depth", 2, 32, log=True)
    }
    return params

def outer_objective(clf,param_function):
    def inner_objective(trial):
        params = param_function(trial)
        model = clf.set_params(**params).fit(x_train,y_train)

        pred = model.predict(x_test)

        f1 = f1_score(pred,y_test)

        return f1
    return inner_objective

In [5]:
study = optuna.create_study(direction="maximize")
study.optimize(outer_objective(RandomForestClassifier(),rf_param_function),n_trials=10)
print(study.best_value)
print(study.best_params)

[32m[I 2022-03-01 08:14:00,830][0m A new study created in memory with name: no-name-0fb78fa5-d09c-4dcd-80ae-60c5557e971e[0m
[32m[I 2022-03-01 08:16:24,512][0m Trial 0 finished with value: 0.8121212121212121 and parameters: {'n_estiamtors': 200, 'max_depth': 5}. Best is trial 0 with value: 0.8121212121212121.[0m
[32m[I 2022-03-01 08:18:30,499][0m Trial 1 finished with value: 0.7125000000000001 and parameters: {'n_estiamtors': 300, 'max_depth': 3}. Best is trial 0 with value: 0.8121212121212121.[0m
[32m[I 2022-03-01 08:25:35,224][0m Trial 2 finished with value: 0.8554216867469878 and parameters: {'n_estiamtors': 200, 'max_depth': 20}. Best is trial 2 with value: 0.8554216867469878.[0m
[32m[I 2022-03-01 08:36:32,136][0m Trial 3 finished with value: 0.8536585365853658 and parameters: {'n_estiamtors': 400, 'max_depth': 13}. Best is trial 2 with value: 0.8554216867469878.[0m
[32m[I 2022-03-01 08:40:11,836][0m Trial 4 finished with value: 0.8170731707317074 and parameters: {'

0.8606060606060607
{'n_estiamtors': 400, 'max_depth': 16}


In [6]:
def svc_param_function(trial):
    params = {
        "C":trial.suggest_float("C", 1e-10, 1e10, log=True)
    }
    return params

def hgbc_param_function(trial):
    params = {
        "max_iter":trial.suggest_int("max_iter",100,500,step=10),
        "max_depth":trial.suggest_int("max_depth", 2, 32, log=True)
    }
    return params

In [7]:
model_list = [
    [RandomForestClassifier(),rf_param_function],
    [SVC(gamma='auto'),svc_param_function],
    [HistGradientBoostingClassifier(),hgbc_param_function]
]

In [8]:
results = pd.DataFrame()

for i in model_list:

    model_name = type(i[0]).__name__
    print(model_name)

    study = optuna.create_study(direction="maximize")
    study.optimize(outer_objective(i[0],i[1]),n_trials=10)

    print(study.best_value)
    print(study.best_params)

    data = {"model":[model_name],"score":[study.best_value]}
    params=study.best_params
    for key, value in params.items():
        params.update({key:[value]})
    updated = {**data, **params}
    
    results = results.append(pd.DataFrame(updated),ignore_index=True)


[32m[I 2022-03-01 09:05:33,766][0m A new study created in memory with name: no-name-bd32ec35-bc11-4148-bb73-0bd173c6beef[0m


RandomForestClassifier


[32m[I 2022-03-01 09:06:45,904][0m Trial 0 finished with value: 0.6838709677419355 and parameters: {'n_estiamtors': 300, 'max_depth': 2}. Best is trial 0 with value: 0.6838709677419355.[0m
[32m[I 2022-03-01 09:09:43,438][0m Trial 1 finished with value: 0.8121212121212121 and parameters: {'n_estiamtors': 300, 'max_depth': 5}. Best is trial 1 with value: 0.8121212121212121.[0m
[32m[I 2022-03-01 09:15:44,504][0m Trial 2 finished with value: 0.8466257668711656 and parameters: {'n_estiamtors': 400, 'max_depth': 8}. Best is trial 2 with value: 0.8466257668711656.[0m
[32m[I 2022-03-01 09:16:58,480][0m Trial 3 finished with value: 0.6753246753246752 and parameters: {'n_estiamtors': 300, 'max_depth': 2}. Best is trial 2 with value: 0.8466257668711656.[0m
[32m[I 2022-03-01 09:32:31,001][0m Trial 4 finished with value: 0.8606060606060607 and parameters: {'n_estiamtors': 500, 'max_depth': 24}. Best is trial 4 with value: 0.8606060606060607.[0m
[32m[I 2022-03-01 09:33:18,746][0m Tr

0.8606060606060607
{'n_estiamtors': 500, 'max_depth': 24}
SVC


[32m[I 2022-03-01 10:10:13,468][0m Trial 0 finished with value: 0.0 and parameters: {'C': 0.007360951171576561}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-03-01 10:11:19,956][0m Trial 1 finished with value: 0.48120300751879713 and parameters: {'C': 198125.6234198046}. Best is trial 1 with value: 0.48120300751879713.[0m
[32m[I 2022-03-01 10:12:26,087][0m Trial 2 finished with value: 0.48120300751879713 and parameters: {'C': 22644351.218810868}. Best is trial 1 with value: 0.48120300751879713.[0m
[32m[I 2022-03-01 10:13:29,489][0m Trial 3 finished with value: 0.48120300751879713 and parameters: {'C': 69845.32430646139}. Best is trial 1 with value: 0.48120300751879713.[0m
[32m[I 2022-03-01 10:29:28,966][0m Trial 4 finished with value: 0.4925373134328359 and parameters: {'C': 17516.317349967412}. Best is trial 4 with value: 0.4925373134328359.[0m
[32m[I 2022-03-01 10:30:37,434][0m Trial 5 finished with value: 0.48120300751879713 and parameters: {'C': 325791.627426086

0.5116279069767442
{'C': 54.96557623903949}
HistGradientBoostingClassifier


[32m[I 2022-03-01 11:33:08,878][0m Trial 0 finished with value: 0.7590361445783131 and parameters: {'max_iter': 440, 'max_depth': 2}. Best is trial 0 with value: 0.7590361445783131.[0m
[32m[I 2022-03-01 11:33:10,586][0m Trial 1 finished with value: 0.7317073170731706 and parameters: {'max_iter': 160, 'max_depth': 2}. Best is trial 0 with value: 0.7590361445783131.[0m
[32m[I 2022-03-01 11:33:12,495][0m Trial 2 finished with value: 0.6 and parameters: {'max_iter': 130, 'max_depth': 18}. Best is trial 0 with value: 0.7590361445783131.[0m
[32m[I 2022-03-01 11:33:14,168][0m Trial 3 finished with value: 0.47457627118644063 and parameters: {'max_iter': 320, 'max_depth': 4}. Best is trial 0 with value: 0.7590361445783131.[0m
[32m[I 2022-03-01 11:33:15,868][0m Trial 4 finished with value: 0.7368421052631579 and parameters: {'max_iter': 380, 'max_depth': 4}. Best is trial 0 with value: 0.7590361445783131.[0m
[32m[I 2022-03-01 11:33:17,565][0m Trial 5 finished with value: 0.69072

0.7590361445783131
{'max_iter': 440, 'max_depth': 2}


In [9]:
results

Unnamed: 0,model,score,n_estiamtors,max_depth,C,max_iter
0,RandomForestClassifier,0.860606,500.0,24.0,,
1,SVC,0.511628,,,54.965576,
2,HistGradientBoostingClassifier,0.759036,,2.0,,440.0
