<a href="https://colab.research.google.com/github/iffyaiyan/Hyper_Tuning/blob/main/Hyperparameter_Tunning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
link = 'https://drive.google.com/file/d/1i08bfAKHGkGocKfC-5UROCXwH4DlcJ7C/view?usp=sharing'
# to get the id part of the file 
id = link.split("/")[-2] 
  
downloaded = drive.CreateFile({'id':id})  
downloaded.GetContentFile('diabetes.csv') 

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:

import pandas as pd

df = pd.read_csv('diabetes.csv')

df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [None]:
import numpy as np
df['Glucose'] = np.where(df['Glucose']==0, df['Glucose'].median(), df['Glucose'])
df['Insulin'] = np.where(df['Insulin'] == 0, df['Insulin'].median(), df['Insulin'])
df['SkinThickness'] = np.where(df['SkinThickness'] == 0, df['SkinThickness'].median(), df['SkinThickness'])
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148.0,72,35.0,30.5,33.6,0.627,50,1
1,1,85.0,66,29.0,30.5,26.6,0.351,31,0
2,8,183.0,64,23.0,30.5,23.3,0.672,32,1
3,1,89.0,66,23.0,94.0,28.1,0.167,21,0
4,0,137.0,40,35.0,168.0,43.1,2.288,33,1


In [None]:
X = df.drop('Outcome', axis = 1)
y = df['Outcome']

In [None]:
# Train-Test Split
from sklearn.model_selection import train_test_split
X_train, X_test,y_train,y_test = train_test_split(X,y, test_size=0.20, random_state=3)

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators=10).fit(X_train, y_train)
prediction = rf_classifier.predict(X_test)

In [None]:
y.value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
print(confusion_matrix(y_test, prediction))
print(accuracy_score(y_test, prediction))
print(classification_report(y_test, prediction))

[[83  9]
 [36 26]]
0.7077922077922078
              precision    recall  f1-score   support

           0       0.70      0.90      0.79        92
           1       0.74      0.42      0.54        62

    accuracy                           0.71       154
   macro avg       0.72      0.66      0.66       154
weighted avg       0.72      0.71      0.69       154



In [None]:
model = RandomForestClassifier(n_estimators = 500, criterion='gini', 
                               max_features = 'sqrt', min_samples_leaf=10, random_state = 100).fit(X_train, y_train)

predictions = model.predict(X_test)
print(confusion_matrix(y_test, predictions))
print(accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))                     

[[81 11]
 [29 33]]
0.7402597402597403
              precision    recall  f1-score   support

           0       0.74      0.88      0.80        92
           1       0.75      0.53      0.62        62

    accuracy                           0.74       154
   macro avg       0.74      0.71      0.71       154
weighted avg       0.74      0.74      0.73       154



Randomized Search

In [None]:
from sklearn.model_selection import RandomizedSearchCV
n_estimators = [int(x) for x in np.linspace(start=200, stop=2000, num=10)]

max_features = ['auto', 'sqrt', 'log2']

max_depth = [int(x) for x in np.linspace(10, 1000, 10)]

min_samples_split = [1,3,4,5,7,9]

min_samples_leaf = [1,2,4,6,8]

random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'criterion': ['entropy', 'gini']
               }

print(random_grid)       

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [1, 3, 4, 5, 7, 9], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


In [None]:
from sklearn.model_selection import RandomizedSearchCV

rf = RandomForestClassifier()
rf_randomcv = RandomizedSearchCV(estimator=rf, param_distributions=random_grid, n_iter = 100, cv=3,
                             verbose = 2, random_state = 100, n_jobs = -1)

rf_randomcv.fit(X_train, y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  41 tasks      | elapsed:   57.7s
[Parallel(n_jobs=-1)]: Done 162 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  7.1min finished


RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
               

In [None]:
rf_randomcv.best_params_

{'criterion': 'entropy',
 'max_depth': 670,
 'max_features': 'log2',
 'min_samples_leaf': 6,
 'min_samples_split': 9,
 'n_estimators': 200}

In [None]:
rf_randomcv.best_estimator_

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=670, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=6, min_samples_split=9,
                       min_weight_fraction_leaf=0.0, n_estimators=200,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
rf_randomcv

RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
               

In [None]:
best_random_grid = rf_randomcv.best_estimator_

In [None]:
from sklearn.metrics import accuracy_score

y_pred = best_random_grid.predict(X_test)

print(confusion_matrix(y_test, y_pred))

print("Accuracy Score {}".format(accuracy_score(y_test, y_pred)))
print("Classification report: {}".format(classification_report(y_test, y_pred)))

[[78 14]
 [24 38]]
Accuracy Score 0.7532467532467533
Classification report:               precision    recall  f1-score   support

           0       0.76      0.85      0.80        92
           1       0.73      0.61      0.67        62

    accuracy                           0.75       154
   macro avg       0.75      0.73      0.74       154
weighted avg       0.75      0.75      0.75       154



# Grid Search CV

In [None]:
rf_randomcv.best_params_

{'criterion': 'entropy',
 'max_depth': 670,
 'max_features': 'log2',
 'min_samples_leaf': 6,
 'min_samples_split': 9,
 'n_estimators': 200}

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'criterion': [rf_randomcv.best_params_['criterion']],
    'max_depth': [rf_randomcv.best_params_['max_depth']],
    'max_features': [rf_randomcv.best_params_['max_features']],
    'min_samples_leaf': [rf_randomcv.best_params_['min_samples_leaf'],
                      rf_randomcv.best_params_['min_samples_leaf']+2,
                      rf_randomcv.best_params_['min_samples_leaf']+4],
    'min_samples_split': [rf_randomcv.best_params_['min_samples_split']-2,
                         rf_randomcv.best_params_['min_samples_split']-1,
                         rf_randomcv.best_params_['min_samples_split'],
                         rf_randomcv.best_params_['min_samples_split']+1,
                         rf_randomcv.best_params_['min_samples_split']+2],
    'n_estimators': [rf_randomcv.best_params_['n_estimators']-200,
                    rf_randomcv.best_params_['n_estimators']-100,
                    rf_randomcv.best_params_['n_estimators'],
                    rf_randomcv.best_params_['n_estimators']+100,
                    rf_randomcv.best_params_['n_estimators']+200],
                         
}

print(param_grid)


{'criterion': ['entropy'], 'max_depth': [670], 'max_features': ['log2'], 'min_samples_leaf': [6, 8, 10], 'min_samples_split': [7, 8, 9, 10, 11], 'n_estimators': [0, 100, 200, 300, 400]}


In [None]:
# Fitting the Grid_Search_CV to the data

rf = RandomForestClassifier()

grid_search = GridSearchCV(estimator = rf, param_grid=param_grid, cv = 10, n_jobs=-1, verbose=2)

grid_search.fit(X_train, y_train)

Fitting 10 folds for each of 75 candidates, totalling 750 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  59 tasks      | elapsed:   16.4s
[Parallel(n_jobs=-1)]: Done 180 tasks      | elapsed:   54.9s
[Parallel(n_jobs=-1)]: Done 383 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 666 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 750 out of 750 | elapsed:  4.0min finished


GridSearchCV(cv=10, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              rand

In [None]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=670, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=8, min_samples_split=11,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
best_grid = grid_search.best_estimator_

In [None]:
best_grid

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=670, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=8, min_samples_split=11,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
y_pred = best_grid.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print("Accuracy Score {}".format(accuracy_score(y_test, y_pred)))
print("Classification report: {}".format(classification_report(y_test, y_pred)))

[[80 12]
 [29 33]]
Accuracy Score 0.7337662337662337
Classification report:               precision    recall  f1-score   support

           0       0.73      0.87      0.80        92
           1       0.73      0.53      0.62        62

    accuracy                           0.73       154
   macro avg       0.73      0.70      0.71       154
weighted avg       0.73      0.73      0.72       154



In [None]:
pip install hyperopt

In [None]:
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

In [None]:
space = {'criterion': hp.choice('criterion', ['entropy', 'gini']),
         'max_depth': hp.quniform('max_depth', 10,1200,10),
         'max_features': hp.choice('max_features', ['auto', 'sqrt', 'log2', None]),
         'min_samples_leaf': hp.uniform('min_samples_leaf', 0, 0.5),
         'min_samples_split': hp.uniform('min_samples_split', 0, 1),
         'n_estimators': hp.choice('n_estimators', [10,50,300,750,1200,1300,1500])}

In [None]:
space

{'criterion': <hyperopt.pyll.base.Apply at 0x7fa637e7c7b8>,
 'max_depth': <hyperopt.pyll.base.Apply at 0x7fa637e7c4a8>,
 'max_features': <hyperopt.pyll.base.Apply at 0x7fa637e7c198>,
 'min_samples_leaf': <hyperopt.pyll.base.Apply at 0x7fa637e7cf98>,
 'min_samples_split': <hyperopt.pyll.base.Apply at 0x7fa637e50080>,
 'n_estimators': <hyperopt.pyll.base.Apply at 0x7fa637e501d0>}

In [None]:
space['criterion']

<hyperopt.pyll.base.Apply at 0x7fa637e7c7b8>

In [None]:

def objective(space):
    model = RandomForestClassifier(criterion = space['criterion'], max_depth = space['max_depth'],
                                 max_features = space['max_features'],
                                 min_samples_leaf = space['min_samples_leaf'],
                                 min_samples_split = space['min_samples_split'],
                                 n_estimators = space['n_estimators'], 
                                 )
    
    accuracy = cross_val_score(model, X_train, y_train, cv = 5).mean()

    # We aim to maximize accuracy, therefore we return it as a negative value
    return {'loss': -accuracy, 'status': STATUS_OK }

In [None]:
from sklearn.model_selection import cross_val_score
trials = Trials()

best = fmin(fn = objective,
            space = space,
            algo = tpe.suggest,
            max_evals = 80,
            trials = trials)

best

100%|██████████| 80/80 [06:20<00:00,  4.76s/it, best loss: -0.7833533253365321]


{'criterion': 0,
 'max_depth': 500.0,
 'max_features': 3,
 'min_samples_leaf': 0.048630030301599604,
 'min_samples_split': 0.16568173929297902,
 'n_estimators': 2}

In [None]:
crit = {0: 'entropy', 1: 'gini'}
feat = {0: 'auto', 1: 'sqrt', 2: 'log2', 3: None}

est = {0:10, 1:50, 2:300, 3:750, 4:1200, 5:1300, 6:1500}

print(crit[best['criterion']])
print(feat[best['max_features']])
print(est[best['n_estimators']])

entropy
None
300


In [None]:
best['min_samples_leaf']

0.048630030301599604

In [None]:
trainedforest = RandomForestClassifier(criterion = crit[best['criterion']], max_depth = best['max_depth'], 
                                       max_features = feat[best['max_features']], 
                                       min_samples_leaf = best['min_samples_leaf'], 
                                       min_samples_split = best['min_samples_split'], 
                                       n_estimators = est[best['n_estimators']]).fit(X_train,y_train)
predictionforest = trainedforest.predict(X_test)
print(confusion_matrix(y_test,predictionforest))
print(accuracy_score(y_test,predictionforest))
print(classification_report(y_test,predictionforest))
acc5 = accuracy_score(y_test,predictionforest)

[[76 16]
 [33 29]]
0.6818181818181818
              precision    recall  f1-score   support

           0       0.70      0.83      0.76        92
           1       0.64      0.47      0.54        62

    accuracy                           0.68       154
   macro avg       0.67      0.65      0.65       154
weighted avg       0.68      0.68      0.67       154



# Genetic Algorithms

In [None]:
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_fatures = ['auto', 'sqrt', 'log2']

max_depth = [int(x) for x in np.linspace(10,1000, 10)]

min_samples_split = [2,5,10,14]
min_samples_leaf = [1,2,4,6,8]

param = {'n_estimators': n_estimators,
         'max_features': max_features,
         'max_depth': max_depth,
         'min_samples_split': min_samples_split,
         'min_samples_leaf': min_samples_leaf,
         'criterion': ['entropy', 'gini']}

print(param)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 120, 230, 340, 450, 560, 670, 780, 890, 1000], 'min_samples_split': [2, 5, 10, 14], 'min_samples_leaf': [1, 2, 4, 6, 8], 'criterion': ['entropy', 'gini']}


In [None]:
pip install tensorflow



In [None]:
# For the latest nightly build:
!pip install tf-nightly

Collecting tf-nightly
[?25l  Downloading https://files.pythonhosted.org/packages/6d/97/ac9bcc975d94ef933b4e5f4c1fc06e347e066a417c57e9c49c7e53f542cf/tf_nightly-2.5.0.dev20201123-cp36-cp36m-manylinux2010_x86_64.whl (397.6MB)
[K     |████████████████████████████████| 397.6MB 38kB/s 
[?25hCollecting numpy~=1.19.2
[?25l  Downloading https://files.pythonhosted.org/packages/87/86/753182c9085ba4936c0076269a571613387cdb77ae2bf537448bfd63472c/numpy-1.19.4-cp36-cp36m-manylinux2010_x86_64.whl (14.5MB)
[K     |████████████████████████████████| 14.5MB 340kB/s 
Collecting tf-estimator-nightly~=2.4.0.dev
[?25l  Downloading https://files.pythonhosted.org/packages/89/d2/2131f5a0f0d14bae7f4d332724748b9ca6746b0d32f5c76145f0707f47d8/tf_estimator_nightly-2.4.0.dev2020102301-py2.py3-none-any.whl (461kB)
[K     |████████████████████████████████| 471kB 42.1MB/s 
Collecting flatbuffers~=1.12.0
  Downloading https://files.pythonhosted.org/packages/eb/26/712e578c5f14e26ae3314c39a1bdc4eb2ec2f4ddc89b708cf8e0

In [None]:
from tpot import TPOTClassifier


tpot_classifier = TPOTClassifier(generations= 5, population_size= 24, offspring_size= 12,
                                 verbosity= 2, early_stop= 12,
                                 config_dict={'sklearn.ensemble.RandomForestClassifier': param}, 
                                 cv = 4, scoring = 'accuracy')
tpot_classifier.fit(X_train,y_train)

ModuleNotFoundError: ignored

In [None]:
accuracy = tpot_classifier.score(X_test, y_test)
print(accuracy)

NameError: ignored

# Optuna Algorithm

In [None]:
pip install optuna

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/87/10/06b58f4120f26b603d905a594650440ea1fd74476b8b360dbf01e111469b/optuna-2.3.0.tar.gz (258kB)
[K     |█▎                              | 10kB 15.2MB/s eta 0:00:01[K     |██▌                             | 20kB 15.6MB/s eta 0:00:01[K     |███▉                            | 30kB 10.8MB/s eta 0:00:01[K     |█████                           | 40kB 8.9MB/s eta 0:00:01[K     |██████▍                         | 51kB 4.3MB/s eta 0:00:01[K     |███████▋                        | 61kB 4.9MB/s eta 0:00:01[K     |████████▉                       | 71kB 4.9MB/s eta 0:00:01[K     |██████████▏                     | 81kB 5.1MB/s eta 0:00:01[K     |███████████▍                    | 92kB 5.5MB/s eta 0:00:01[K     |████████████▊                   | 102kB 5.8MB/s eta 0:00:01[K     |██████████████                  | 112kB 5.8MB/s eta 0:00:01[K     |███████████████▏                | 122kB 5.8MB/s eta 0:00:01[K 

In [None]:
import optuna
import sklearn.svm
def objective(trial):

    classifier = trial.suggest_categorical('classifier', ['RandomForest', 'SVC'])
    
    if classifier == 'RandomForest':
        n_estimators = trial.suggest_int('n_estimators', 200, 2000,10)
        max_depth = int(trial.suggest_float('max_depth', 10, 100, log=True))

        clf = sklearn.ensemble.RandomForestClassifier(
            n_estimators=n_estimators, max_depth=max_depth)
    else:
        c = trial.suggest_float('svc_c', 1e-10, 1e10, log=True)
        
        clf = sklearn.svm.SVC(C=c, gamma='auto')

    return sklearn.model_selection.cross_val_score(
        clf,X_train,y_train, n_jobs=-1, cv=3).mean()

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2020-11-23 11:31:06,577][0m A new study created in memory with name: no-name-502a1252-e7e2-4951-a52c-5e856517676d[0m
[32m[I 2020-11-23 11:31:16,234][0m Trial 0 finished with value: 0.7606089590307668 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1770, 'max_depth': 11.531937771550425}. Best is trial 0 with value: 0.7606089590307668.[0m
[32m[I 2020-11-23 11:31:22,768][0m Trial 1 finished with value: 0.7606089590307668 and parameters: {'classifier': 'RandomForest', 'n_estimators': 1450, 'max_depth': 25.552271407225916}. Best is trial 0 with value: 0.7606089590307668.[0m
[32m[I 2020-11-23 11:31:22,835][0m Trial 2 finished with value: 0.66449864498645 and parameters: {'classifier': 'SVC', 'svc_c': 1632826112.1431613}. Best is trial 0 with value: 0.7606089590307668.[0m
[32m[I 2020-11-23 11:31:26,788][0m Trial 3 finished with value: 0.7622509166268133 and parameters: {'classifier': 'RandomForest', 'n_estimators': 880, 'max_depth': 11.969552223935043}. Bes

Accuracy: 0.7736489717838354
Best hyperparameters: {'classifier': 'RandomForest', 'n_estimators': 720, 'max_depth': 30.59942345089515}


In [None]:
trial

FrozenTrial(number=21, value=0.7736489717838354, datetime_start=datetime.datetime(2020, 11, 23, 11, 32, 8, 101425), datetime_complete=datetime.datetime(2020, 11, 23, 11, 32, 11, 358262), params={'classifier': 'RandomForest', 'n_estimators': 720, 'max_depth': 30.59942345089515}, distributions={'classifier': CategoricalDistribution(choices=('RandomForest', 'SVC')), 'n_estimators': IntUniformDistribution(high=2000, low=200, step=10), 'max_depth': LogUniformDistribution(high=100, low=10)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=21, state=TrialState.COMPLETE)

In [None]:
study.best_params

{'classifier': 'RandomForest',
 'max_depth': 30.59942345089515,
 'n_estimators': 720}

In [None]:
rf=RandomForestClassifier(n_estimators=330,max_depth=30)
rf.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=30, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=330,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
y_pred=rf.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[79 13]
 [26 36]]
0.7467532467532467
              precision    recall  f1-score   support

           0       0.75      0.86      0.80        92
           1       0.73      0.58      0.65        62

    accuracy                           0.75       154
   macro avg       0.74      0.72      0.73       154
weighted avg       0.75      0.75      0.74       154

