In [None]:
import optuna

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold,cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix,classification_report,roc_auc_score,roc_curve,precision_recall_curve
import statsmodels.formula.api as sm
from xgboost import XGBClassifier
import datetime
from datetime import *

In [None]:
train = pd.read_csv('/content/Train.csv')
test = pd.read_csv('/content/Test.csv')

In [None]:
X=train.drop("Disbursed",axis=1)
Y=train["Disbursed"]

In [None]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=100)

In [None]:
import xgboost as xgb

In [None]:
clf = xgb.XGBClassifier()

In [None]:
skf = StratifiedKFold(n_splits=10,random_state=1,shuffle=True)

In [None]:
scores = cross_val_score(clf,X,Y,scoring="f1_macro",cv=skf,n_jobs=-1)
scores

array([0.49632459, 0.49632459, 0.49632459, 0.49632459, 0.49632459,
       0.49632459, 0.49632459, 0.49629544, 0.49629544, 0.49629544])

In [None]:
avg_f1_score = scores.mean()
avg_f1_score

0.49631584700073095

In [None]:
def objective(trial):
    n_estimators=trial.suggest_int("n_estimators",100,250,step=10)
    learning_rate=trial.suggest_float("learning_rate",1e-7,0.3,log=True)
    scale_pos_weight=trial.suggest_int("scale_pos_weight",1,80,step=1)
    max_depth=trial.suggest_int("max_depth",3,12,step=1)
    min_child_weight=trial.suggest_int("min_child_weight",1,15,step=1)
    gamma=trial.suggest_float("gamma",1e-5,0.5,log=True)
    subsample=trial.suggest_float("subsample",0.5,1,log=True)
    colsample_bytree=trial.suggest_float("colsample_bytree",0.5,1,log=True)
    reg_lambda=trial.suggest_int("reg_lambda",0,50,step=1)
    reg_alpha=trial.suggest_int("reg_alpha",0,50,step=1)
    clf = xgb.XGBClassifier(n_estimators=n_estimators,learning_rate=learning_rate,scale_pos_weight=scale_pos_weight,
                            max_depth=max_depth,min_child_weight=min_child_weight,gamma=gamma,
                            subsample=subsample,colsample_bytree=colsample_bytree,reg_lambda=reg_lambda,
                            reg_alpha=reg_alpha)
    skf = StratifiedKFold(n_splits=10,random_state=1,shuffle=True)
    scores = cross_val_score(clf,X,Y,scoring='f1_macro',cv=skf,n_jobs=-1)
    f1_score = scores.mean()
    return f1_score

In [None]:
study = optuna.create_study(direction="maximize",study_name="f1-score-max")

[32m[I 2022-08-05 22:12:26,976][0m A new study created in memory with name: f1-score-max[0m


In [None]:
study.optimize(objective,n_trials=10)

[32m[I 2022-08-05 22:17:15,913][0m Trial 0 finished with value: 0.437122439566249 and parameters: {'n_estimators': 190, 'learning_rate': 0.0005210540319295563, 'scale_pos_weight': 80, 'max_depth': 6, 'min_child_weight': 8, 'gamma': 1.0094549595196925e-05, 'subsample': 0.5988699371033575, 'colsample_bytree': 0.827358544709625, 'reg_lambda': 5, 'reg_alpha': 37}. Best is trial 0 with value: 0.437122439566249.[0m
[32m[I 2022-08-05 22:20:57,712][0m Trial 1 finished with value: 0.45176775662350277 and parameters: {'n_estimators': 250, 'learning_rate': 0.005826313814341369, 'scale_pos_weight': 61, 'max_depth': 4, 'min_child_weight': 14, 'gamma': 0.005039012038617675, 'subsample': 0.8507070037843115, 'colsample_bytree': 0.7898879576692381, 'reg_lambda': 16, 'reg_alpha': 37}. Best is trial 1 with value: 0.45176775662350277.[0m
[32m[I 2022-08-05 22:24:25,819][0m Trial 2 finished with value: 0.5117498681030433 and parameters: {'n_estimators': 120, 'learning_rate': 0.02476964613729218, 'sc

In [None]:
study.best_params

{'colsample_bytree': 0.7533702125494864,
 'gamma': 0.0015298262975746723,
 'learning_rate': 0.08346350813860294,
 'max_depth': 11,
 'min_child_weight': 1,
 'n_estimators': 250,
 'reg_alpha': 9,
 'reg_lambda': 19,
 'scale_pos_weight': 10,
 'subsample': 0.5274011452364881}

In [None]:
study.best_value

0.5343904376182129