In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv('train1.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
df.shape

(891, 12)

In [4]:
df['Embarked'].unique()

array(['S', 'C', 'Q', nan], dtype=object)

In [5]:
modeval=df['Embarked'].mode()[0]
df['Embarked']=df['Embarked'].fillna(modeval)

In [6]:
mapping={'S':0,'C':1,'Q':2}
df['Embarked']=df['Embarked'].replace(mapping)
mapping1={'male':0,'female':1}
df['Sex']=df['Sex'].replace(mapping1)

In [7]:
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex              int64
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked         int64
dtype: object

In [8]:
df.drop(['PassengerId','Name','Ticket','Cabin'],axis=1,inplace=True)

In [9]:
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,0,22.0,1,0,7.25,0
1,1,1,1,38.0,1,0,71.2833,1
2,1,3,1,26.0,0,0,7.925,0
3,1,1,1,35.0,1,0,53.1,0
4,0,3,0,35.0,0,0,8.05,0


In [10]:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

In [11]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [12]:
from hyperopt import hp,fmin,tpe,STATUS_OK,Trials

In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [14]:
space={
    'criterion':hp.choice('criterion',['entropy','gini','log_loss']),
    'max_depth':hp.randint('Max_depth',1,1000),
    'max_features':hp.choice('max_features',['sqrt','log2',None]),
    'min_samples_leaf':hp.uniform('min_samples_leaf',0,0.5),
    'min_samples_split':hp.uniform('min_samples_split',0,1),
    'n_estimators':hp.choice('n_estimators',[10,50,100,150,300,500,700,1000])
}

In [15]:
space

{'criterion': <hyperopt.pyll.base.Apply at 0x23b29b37a90>,
 'max_depth': <hyperopt.pyll.base.Apply at 0x23b29b37e10>,
 'max_features': <hyperopt.pyll.base.Apply at 0x23b29b3c490>,
 'min_samples_leaf': <hyperopt.pyll.base.Apply at 0x23b29b3c910>,
 'min_samples_split': <hyperopt.pyll.base.Apply at 0x23b29b3cd90>,
 'n_estimators': <hyperopt.pyll.base.Apply at 0x23b29b3d790>}

In [16]:
def objective(space):
    model=RandomForestClassifier(criterion=space['criterion'],
                                max_depth=space['max_depth'],
                                max_features=space['max_features'],
                                min_samples_leaf=space['min_samples_leaf'],
                                min_samples_split=space['min_samples_split'],
                                n_estimators=space['n_estimators'])
    accuracy=cross_val_score(model,X_train,y_train,cv=5).mean()
    return {"loss":-accuracy,"status":STATUS_OK}

In [17]:
trials=Trials()
best=fmin(fn=objective,
         space=space,
         algo=tpe.suggest,
         max_evals=80,
         trials=trials)
best

100%|███████████████████████████████████████████████| 80/80 [08:35<00:00,  6.44s/trial, best loss: -0.7401654683344825]


{'Max_depth': 660,
 'criterion': 0,
 'max_features': 0,
 'min_samples_leaf': 0.07793663856392763,
 'min_samples_split': 0.5816326491418702,
 'n_estimators': 7}

In [18]:
crit = {0: 'entropy', 1: 'gini',2:'log_loss'}
feat = {0: None, 1: 'sqrt', 2: 'log2'}
est = {0: 10, 1: 50, 2: 100, 3: 150, 4: 300,5:500,6:700,7:1000}
print(crit[best['criterion']])
print(feat[best['max_features']])
print(est[best['n_estimators']])

entropy
None
1000


In [19]:
best['min_samples_leaf']

0.07793663856392763

In [20]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
trainedforest = RandomForestClassifier(criterion = crit[best['criterion']], max_depth = best['Max_depth'], 
                                       max_features = feat[best['max_features']], 
                                       min_samples_leaf = best['min_samples_leaf'], 
                                       min_samples_split = best['min_samples_split'], 
                                       n_estimators = est[best['n_estimators']]).fit(X_train,y_train)
predictionforest = trainedforest.predict(X_test)
print(confusion_matrix(y_test,predictionforest))
print(accuracy_score(y_test,predictionforest))
print(classification_report(y_test,predictionforest))
acc5 = accuracy_score(y_test,predictionforest)

[[119   0   0]
 [ 43   0   0]
 [ 17   0   0]]
0.664804469273743
              precision    recall  f1-score   support

           0       0.66      1.00      0.80       119
           1       0.00      0.00      0.00        43
           2       0.00      0.00      0.00        17

    accuracy                           0.66       179
   macro avg       0.22      0.33      0.27       179
weighted avg       0.44      0.66      0.53       179



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
space={
    'booster':hp.choice('booster',['gbtree','gblinear']),
    'max_depth':hp.randint('max_depth',1,10),
    'eta':hp.uniform('eta',0.01,0.3),
    'n_estimators':hp.choice('n_estimators',[10,50,100,150,300,500,1000])
}

In [21]:
import xgboost as xgb

In [16]:
def objective(space):
    model=xgb.XGBClassifier(
    booster=space['booster'],
    max_depth=space['max_depth'],
    eta=space['eta'],
    n_estimators=space['n_estimators']    
    )
    accuracy=cross_val_score(model,X_train,y_train,cv=5).mean()
    return {"loss":-accuracy,"status":STATUS_OK}

In [17]:
trials=Trials()
best=fmin(fn=objective,
         space=space,
         algo=tpe.suggest,
         max_evals=80,
         trials=trials)

  2%|█▏                                              | 2/80 [00:03<02:05,  1.61s/trial, best loss: -0.8427361371023343]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




  4%|█▊                                              | 3/80 [00:03<01:11,  1.08trial/s, best loss: -0.8427361371023343]

Parameters: { "max_depth" } are not used.




  6%|███                                             | 5/80 [00:04<00:35,  2.09trial/s, best loss: -0.8427361371023343]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




  8%|███▌                                            | 6/80 [00:06<01:10,  1.04trial/s, best loss: -0.8427459864079582]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




  9%|████▏                                           | 7/80 [00:06<00:55,  1.32trial/s, best loss: -0.8427459864079582]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 10%|████▊                                           | 8/80 [00:06<00:43,  1.65trial/s, best loss: -0.8427459864079582]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 12%|█████▉                                         | 10/80 [00:07<00:26,  2.66trial/s, best loss: -0.8427459864079582]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 19%|████████▊                                      | 15/80 [00:12<00:56,  1.15trial/s, best loss: -0.8441248891953116]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 21%|█████████▉                                     | 17/80 [00:13<00:38,  1.63trial/s, best loss: -0.8441248891953116]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 22%|██████████▌                                    | 18/80 [00:13<00:28,  2.16trial/s, best loss: -0.8441248891953116]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 24%|███████████▏                                   | 19/80 [00:14<00:37,  1.61trial/s, best loss: -0.8441248891953116]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 55%|█████████████████████████▊                     | 44/80 [01:52<02:33,  4.28s/trial, best loss: -0.8652221018418201]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 56%|██████████████████████████▍                    | 45/80 [01:52<01:51,  3.19s/trial, best loss: -0.8652221018418201]

Parameters: { "max_depth" } are not used.




 59%|███████████████████████████▌                   | 47/80 [01:57<01:21,  2.48s/trial, best loss: -0.8652221018418201]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 61%|████████████████████████████▊                  | 49/80 [01:59<00:53,  1.74s/trial, best loss: -0.8652221018418201]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 65%|██████████████████████████████▌                | 52/80 [02:08<01:09,  2.50s/trial, best loss: -0.8652221018418201]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 68%|████████████████████████████████▍               | 54/80 [02:11<00:53,  2.04s/trial, best loss: -0.868009455333399]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 71%|██████████████████████████████████▏             | 57/80 [02:17<00:48,  2.12s/trial, best loss: -0.868009455333399]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 72%|██████████████████████████████████▊             | 58/80 [02:18<00:39,  1.78s/trial, best loss: -0.868009455333399]

Parameters: { "max_depth" } are not used.




 76%|███████████████████████████████████▊           | 61/80 [02:26<00:43,  2.28s/trial, best loss: -0.8694277553432481]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




 99%|██████████████████████████████████████████████▍| 79/80 [03:03<00:01,  1.55s/trial, best loss: -0.8722249581404512]

Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.


Parameters: { "max_depth" } are not used.




100%|███████████████████████████████████████████████| 80/80 [03:04<00:00,  2.31s/trial, best loss: -0.8722249581404512]


In [18]:
print(best)

{'booster': 0, 'eta': 0.20219653234992235, 'max_depth': 2, 'n_estimators': 5}


In [19]:
boost={0:'gbtree',1:'gblinear'}
est = {0: 10, 1: 50, 2: 100, 3: 150, 4: 300,5:500,6:1000}
print(boost[best['booster']])
print(est[best['n_estimators']])

gbtree
500


In [24]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
trainedxgb=xgb.XGBClassifier(booster=boost[best['booster']],
                        max_depth=best['max_depth'],
                        eta=best['eta'],
                        n_estimators=est[best['n_estimators']]).fit(X_train,y_train)
predxgb=trainedxgb.predict(X_test)
print(confusion_matrix(y_test,predxgb))
print(accuracy_score(y_test,predxgb))
print(classification_report(y_test,predxgb))

[[112   5   2]
 [ 23  20   0]
 [  3   0  14]]
0.8156424581005587
              precision    recall  f1-score   support

           0       0.81      0.94      0.87       119
           1       0.80      0.47      0.59        43
           2       0.88      0.82      0.85        17

    accuracy                           0.82       179
   macro avg       0.83      0.74      0.77       179
weighted avg       0.81      0.82      0.80       179



In [27]:
import optuna
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Define your data and split it into training and testing sets
# Assuming X and y are your features and target variable respectively
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the objective function
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 30, 50)
    max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
    max_features = trial.suggest_int('max_features', 15, 25)
    
    # Create your model with the suggested hyperparameters
    RFC_model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features, min_samples_leaf=1, min_samples_split=2)
    
    # Train your model and make predictions
    RFC_model.fit(X_train, y_train)
    y_pred = RFC_model.predict(X_test)
    
    return sklearn.model_selection.cross_val_score(
        RFC_model,X_train,y_train, n_jobs=-1, cv=3).mean()

In [28]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[I 2024-03-13 14:30:29,665] A new study created in memory with name: no-name-69026d9d-46eb-4e63-a182-274915811d35
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:32,372] Trial 0 finished with value: 0.8202377997612547 and parameters: {'n_estimators': 34, 'max_depth': 18.284093008720927, 'max_features': 16}. Best is trial 0 with value: 0.8202377997612547.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:34,175] Trial 1 finished with value: 0.8216383599853443 and parameters: {'n_estimators': 49, 'max_depth': 22.627631366704566, 'max_features': 16}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:35,932] Trial 2 finished with value: 0.8033660721672634 and parameters: {'n_estimators': 48, 'max_depth': 23.757164704180866, 'max_features': 20}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('ma

  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:41,322] Trial 15 finished with value: 0.8089919512108641 and parameters: {'n_estimators': 40, 'max_depth': 21.162624024193367, 'max_features': 15}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:41,905] Trial 16 finished with value: 0.8019596024063634 and parameters: {'n_estimators': 46, 'max_depth': 18.055236610417634, 'max_features': 18}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:42,392] Trial 17 finished with value: 0.8090096798212957 and parameters: {'n_estimators': 40, 'max_depth': 21.045808375946024, 'max_features': 17}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:42,825] Trial 18 finished with value: 0.8089919512108641 and parameters:

  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:48,615] Trial 30 finished with value: 0.8160302095521753 and parameters: {'n_estimators': 49, 'max_depth': 19.862193909775698, 'max_features': 19}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:49,172] Trial 31 finished with value: 0.8089801321372431 and parameters: {'n_estimators': 48, 'max_depth': 20.10656906814077, 'max_features': 19}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:49,733] Trial 32 finished with value: 0.8146237397912751 and parameters: {'n_estimators': 49, 'max_depth': 19.470134031191915, 'max_features': 20}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:50,339] Trial 33 finished with value: 0.8061790116890638 and parameters: 

  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:56,456] Trial 45 finished with value: 0.8061790116890638 and parameters: {'n_estimators': 32, 'max_depth': 19.596451158473073, 'max_features': 19}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:57,022] Trial 46 finished with value: 0.8089860416740535 and parameters: {'n_estimators': 50, 'max_depth': 21.673961522445122, 'max_features': 16}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:57,597] Trial 47 finished with value: 0.8117930716590434 and parameters: {'n_estimators': 46, 'max_depth': 23.219809295717965, 'max_features': 18}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:30:58,155] Trial 48 finished with value: 0.8174307697762649 and parameters:

  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:04,923] Trial 60 finished with value: 0.8118167098062855 and parameters: {'n_estimators': 46, 'max_depth': 21.4685524402563, 'max_features': 18}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:05,489] Trial 61 finished with value: 0.8118167098062855 and parameters: {'n_estimators': 41, 'max_depth': 23.058872775626067, 'max_features': 19}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:06,055] Trial 62 finished with value: 0.814611920717654 and parameters: {'n_estimators': 43, 'max_depth': 22.809073867205722, 'max_features': 18}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:06,556] Trial 63 finished with value: 0.8132113604935646 and parameters: {'

  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:12,602] Trial 75 finished with value: 0.8075854814499639 and parameters: {'n_estimators': 36, 'max_depth': 27.417232832097092, 'max_features': 24}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:13,039] Trial 76 finished with value: 0.8146237397912751 and parameters: {'n_estimators': 33, 'max_depth': 20.897526186678032, 'max_features': 20}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:13,529] Trial 77 finished with value: 0.807609119597206 and parameters: {'n_estimators': 38, 'max_depth': 18.524548962643, 'max_features': 25}. Best is trial 1 with value: 0.8216383599853443.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:14,019] Trial 78 finished with value: 0.8146060111808436 and parameters: {'n

  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:19,448] Trial 90 finished with value: 0.8146060111808436 and parameters: {'n_estimators': 32, 'max_depth': 15.783990990454232, 'max_features': 22}. Best is trial 84 with value: 0.8216679076693968.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:19,839] Trial 91 finished with value: 0.8146178302544648 and parameters: {'n_estimators': 32, 'max_depth': 17.030143309335532, 'max_features': 16}. Best is trial 84 with value: 0.8216679076693968.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:20,275] Trial 92 finished with value: 0.8033897103145056 and parameters: {'n_estimators': 33, 'max_depth': 19.01059081654515, 'max_features': 15}. Best is trial 84 with value: 0.8216679076693968.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
[I 2024-03-13 14:31:20,695] Trial 93 finished with value: 0.8019832405536054 and parameter

Accuracy: 0.8216679076693968
Best hyperparameters: {'n_estimators': 32, 'max_depth': 17.029294368220736, 'max_features': 16}


In [29]:
trial

FrozenTrial(number=84, state=1, values=[0.8216679076693968], datetime_start=datetime.datetime(2024, 3, 13, 14, 31, 16, 371509), datetime_complete=datetime.datetime(2024, 3, 13, 14, 31, 16, 786849), params={'n_estimators': 32, 'max_depth': 17.029294368220736, 'max_features': 16}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=50, log=False, low=30, step=1), 'max_depth': FloatDistribution(high=30.0, log=True, low=15.0, step=None), 'max_features': IntDistribution(high=25, log=False, low=15, step=1)}, trial_id=84, value=None)

In [30]:
study.best_params

{'n_estimators': 32, 'max_depth': 17.029294368220736, 'max_features': 16}

In [31]:
rf=RandomForestClassifier(n_estimators=330,max_depth=30)
rf.fit(X_train,y_train)

In [32]:
y_pred=rf.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[112   4   3]
 [ 27  16   0]
 [  6   0  11]]
0.776536312849162
              precision    recall  f1-score   support

           0       0.77      0.94      0.85       119
           1       0.80      0.37      0.51        43
           2       0.79      0.65      0.71        17

    accuracy                           0.78       179
   macro avg       0.79      0.65      0.69       179
weighted avg       0.78      0.78      0.75       179



In [33]:
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 30, 50)
    max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
    booster=trial.suggest_categorical('booster',['gbtree','gblinear','dart'])
    eta=trial.suggest_uniform('eta',0.01,0.3)
    
    xgb_classifier=xgb.XGBClassifier(n_estimators=n_estimators,max_depth=max_depth,booster=booster,eta=eta)
    
    xgb_classifier.fit(X_train,y_train)
    y_pred=xgb_classifier.predict(X_test)
    
    return sklearn.model_selection.cross_val_score(
    xgb_classifier,X_train,y_train, n_jobs=-1, cv=3).mean()

In [34]:
study=optuna.create_study(direction="maximize")
study.optimize(objective,n_trials=100)
trial=study.best_trial
print('Accuracy: {}'.format(trial.value))
print('best hyperparameters: {}'.format(trial.params))

[I 2024-03-13 14:50:37,535] A new study created in memory with name: no-name-4d3d3a4d-d9f3-477e-9ce2-eafb0612a7cc
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
Parameters: { "max_depth" } are not used.

[I 2024-03-13 14:50:41,052] Trial 0 finished with value: 0.737368365067546 and parameters: {'n_estimators': 35, 'max_depth': 15.596921338682591, 'booster': 'gblinear', 'eta': 0.13977533279655924}. Best is trial 0 with value: 0.737368365067546.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
Parameters: { "max_depth" } are not used.

[I 2024-03-13 14:50:42,380] Trial 1 finished with value: 0.7373624555307355 and parameters: {'n_estimators': 43, 'max_depth': 15.774229094895574, 'booster': 'gblinear', 'eta': 0.08931334416524302}. Best is trial 0 with value: 0.737368365067546.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform(

[I 2024-03-13 14:50:46,367] Trial 7 finished with value: 0.827264239028945 and parameters: {'n_estimators': 47, 'max_depth': 21.00047239937455, 'booster': 'dart', 'eta': 0.020815514798319487}. Best is trial 7 with value: 0.827264239028945.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:50:48,400] Trial 8 finished with value: 0.8174366793130754 and parameters: {'n_estimators': 49, 'max_depth': 24.667667127792047, 'booster': 'dart', 'eta': 0.2466547460355068}. Best is trial 7 with value: 0.827264239028945.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:50:48,918] Trial 9 finished with value: 0.8118167098062855 and parameters: {'n_estimators': 46, 'max_depth': 24.656169048463184, 'booster': 'gbtree', 'eta': 0.09586666295679037}. Best is trial 7 with value: 0.827264239028945.
  max_depth = int(trial.suggest_loguniform('max_depth', 15,

[I 2024-03-13 14:51:00,505] Trial 17 finished with value: 0.8230566488198655 and parameters: {'n_estimators': 44, 'max_depth': 17.236868805837087, 'booster': 'gbtree', 'eta': 0.2597432783368333}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:51:01,889] Trial 18 finished with value: 0.8174248602394544 and parameters: {'n_estimators': 39, 'max_depth': 17.451229374856027, 'booster': 'dart', 'eta': 0.22969938485886146}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:51:03,540] Trial 19 finished with value: 0.8160243000153647 and parameters: {'n_estimators': 44, 'max_depth': 23.592580639679966, 'booster': 'dart', 'eta': 0.20950707210142822}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_d

[I 2024-03-13 14:51:16,100] Trial 27 finished with value: 0.8146237397912751 and parameters: {'n_estimators': 45, 'max_depth': 18.138142669822997, 'booster': 'dart', 'eta': 0.15565065178040682}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:51:18,015] Trial 28 finished with value: 0.8160243000153647 and parameters: {'n_estimators': 48, 'max_depth': 25.726360457534927, 'booster': 'dart', 'eta': 0.24198337535137465}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:51:18,525] Trial 29 finished with value: 0.8047843610017846 and parameters: {'n_estimators': 38, 'max_depth': 16.06147753210223, 'booster': 'gbtree', 'eta': 0.11502027174092617}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_d

[I 2024-03-13 14:51:27,689] Trial 36 finished with value: 0.8104102400453853 and parameters: {'n_estimators': 40, 'max_depth': 15.556994214599008, 'booster': 'dart', 'eta': 0.10705358770673401}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
Parameters: { "max_depth" } are not used.

[I 2024-03-13 14:51:27,823] Trial 37 finished with value: 0.7415759552766255 and parameters: {'n_estimators': 43, 'max_depth': 16.685340942324398, 'booster': 'gblinear', 'eta': 0.07249139134538271}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:51:29,019] Trial 38 finished with value: 0.8202318902244442 and parameters: {'n_estimators': 36, 'max_depth': 21.090981696593662, 'booster': 'dart', 'eta': 0.05195528186422885}. Best is trial 13 with value: 0.8300889976243662.
  m

[I 2024-03-13 14:51:39,642] Trial 45 finished with value: 0.8230330106726235 and parameters: {'n_estimators': 43, 'max_depth': 15.484721024791888, 'booster': 'dart', 'eta': 0.010214402204333655}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:51:41,390] Trial 46 finished with value: 0.8216619981325864 and parameters: {'n_estimators': 45, 'max_depth': 16.977793697318084, 'booster': 'dart', 'eta': 0.28370723217303173}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:51:41,889] Trial 47 finished with value: 0.8244572090439551 and parameters: {'n_estimators': 41, 'max_depth': 29.4997520796815, 'booster': 'gbtree', 'eta': 0.03484578058217946}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_d

[I 2024-03-13 14:51:52,425] Trial 54 finished with value: 0.8117989811958539 and parameters: {'n_estimators': 42, 'max_depth': 16.89735906499252, 'booster': 'dart', 'eta': 0.061910128879649924}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:51:54,189] Trial 55 finished with value: 0.8174248602394544 and parameters: {'n_estimators': 46, 'max_depth': 20.71030798913473, 'booster': 'dart', 'eta': 0.04052309469123343}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:51:55,892] Trial 56 finished with value: 0.8146237397912751 and parameters: {'n_estimators': 45, 'max_depth': 17.91334438958155, 'booster': 'dart', 'eta': 0.18849853179710932}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_dept

[I 2024-03-13 14:52:07,639] Trial 64 finished with value: 0.8244512995071446 and parameters: {'n_estimators': 44, 'max_depth': 17.293226942791744, 'booster': 'dart', 'eta': 0.013171786047725892}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:52:09,364] Trial 65 finished with value: 0.8188431490739756 and parameters: {'n_estimators': 46, 'max_depth': 15.345949411058546, 'booster': 'dart', 'eta': 0.1665915776866635}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:52:11,236] Trial 66 finished with value: 0.807597300523585 and parameters: {'n_estimators': 48, 'max_depth': 17.025937807286557, 'booster': 'dart', 'eta': 0.05548842145745268}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_dep

[I 2024-03-13 14:52:20,279] Trial 73 finished with value: 0.8258636788048553 and parameters: {'n_estimators': 45, 'max_depth': 22.555839917511438, 'booster': 'dart', 'eta': 0.01714857618741455}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:52:21,839] Trial 74 finished with value: 0.8258636788048553 and parameters: {'n_estimators': 43, 'max_depth': 20.657769800363052, 'booster': 'dart', 'eta': 0.03043960981549069}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:52:23,593] Trial 75 finished with value: 0.8146178302544648 and parameters: {'n_estimators': 45, 'max_depth': 21.230014749790055, 'booster': 'dart', 'eta': 0.0812834747455974}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_dep

[I 2024-03-13 14:52:33,922] Trial 82 finished with value: 0.827264239028945 and parameters: {'n_estimators': 45, 'max_depth': 15.744267204212852, 'booster': 'dart', 'eta': 0.026521062403515323}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:52:35,598] Trial 83 finished with value: 0.8258577692680448 and parameters: {'n_estimators': 45, 'max_depth': 15.750875530184754, 'booster': 'dart', 'eta': 0.021419252773551334}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:52:37,397] Trial 84 finished with value: 0.8258518597312342 and parameters: {'n_estimators': 46, 'max_depth': 16.496718572147017, 'booster': 'dart', 'eta': 0.01043732512021078}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_d

[I 2024-03-13 14:52:43,581] Trial 92 finished with value: 0.827264239028945 and parameters: {'n_estimators': 46, 'max_depth': 16.326151047272607, 'booster': 'dart', 'eta': 0.024536021218973345}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:52:45,524] Trial 93 finished with value: 0.8244690281175763 and parameters: {'n_estimators': 46, 'max_depth': 15.495548800118268, 'booster': 'dart', 'eta': 0.26433666396833955}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_depth', 15, 30))
  eta=trial.suggest_uniform('eta',0.01,0.3)
[I 2024-03-13 14:52:46,439] Trial 94 finished with value: 0.8160361190889858 and parameters: {'n_estimators': 30, 'max_depth': 17.29128225939714, 'booster': 'dart', 'eta': 0.2910682773176389}. Best is trial 13 with value: 0.8300889976243662.
  max_depth = int(trial.suggest_loguniform('max_dept

Accuracy: 0.8300889976243662
best hyperparameters: {'n_estimators': 46, 'max_depth': 21.885545580049833, 'booster': 'dart', 'eta': 0.28219812952251877}


In [35]:
trial

FrozenTrial(number=13, state=1, values=[0.8300889976243662], datetime_start=datetime.datetime(2024, 3, 13, 14, 50, 54, 758619), datetime_complete=datetime.datetime(2024, 3, 13, 14, 50, 56, 572217), params={'n_estimators': 46, 'max_depth': 21.885545580049833, 'booster': 'dart', 'eta': 0.28219812952251877}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=50, log=False, low=30, step=1), 'max_depth': FloatDistribution(high=30.0, log=True, low=15.0, step=None), 'booster': CategoricalDistribution(choices=('gbtree', 'gblinear', 'dart')), 'eta': FloatDistribution(high=0.3, log=False, low=0.01, step=None)}, trial_id=13, value=None)

In [38]:
study.best_params

{'n_estimators': 46,
 'max_depth': 21.885545580049833,
 'booster': 'dart',
 'eta': 0.28219812952251877}

In [42]:
best_params = {
    'n_estimators': 46,
    'max_depth': 21,
    'booster': 'dart',
    'eta': 0.28219812952251877
}
xgb_classifier = xgb.XGBClassifier(
    n_estimators=best_params['n_estimators'],
    max_depth=int(best_params['max_depth']),
    booster=best_params['booster'],
    eta=best_params['eta']
)
xgb_classifier.fit(X_train,y_train)

In [43]:
y_pred=xgb_classifier.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[112   5   2]
 [ 22  21   0]
 [  3   0  14]]
0.8212290502793296
              precision    recall  f1-score   support

           0       0.82      0.94      0.88       119
           1       0.81      0.49      0.61        43
           2       0.88      0.82      0.85        17

    accuracy                           0.82       179
   macro avg       0.83      0.75      0.78       179
weighted avg       0.82      0.82      0.81       179

