In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import  GridSearchCV
from sklearn.metrics import mean_squared_error

from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
diabetes = pd.read_csv("../Data/diabetes.csv")
df = diabetes.copy()
df = df.dropna()
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
from sklearn.model_selection import train_test_split

y = df["Outcome"]
X = df.drop(['Outcome'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.30,
                                                    random_state=42)


In [4]:
from catboost import CatBoostClassifier

In [5]:
cat_model = CatBoostClassifier().fit(X_train, y_train)

Learning rate set to 0.0079
0:	learn: 0.6879744	total: 146ms	remaining: 2m 25s
1:	learn: 0.6843414	total: 148ms	remaining: 1m 13s
2:	learn: 0.6805760	total: 150ms	remaining: 49.9s
3:	learn: 0.6765782	total: 152ms	remaining: 37.8s
4:	learn: 0.6720829	total: 154ms	remaining: 30.6s
5:	learn: 0.6681617	total: 156ms	remaining: 25.8s
6:	learn: 0.6641912	total: 157ms	remaining: 22.3s
7:	learn: 0.6602881	total: 159ms	remaining: 19.7s
8:	learn: 0.6562626	total: 161ms	remaining: 17.7s
9:	learn: 0.6528483	total: 162ms	remaining: 16.1s
10:	learn: 0.6487823	total: 165ms	remaining: 14.8s
11:	learn: 0.6454980	total: 167ms	remaining: 13.7s
12:	learn: 0.6418953	total: 168ms	remaining: 12.8s
13:	learn: 0.6374431	total: 170ms	remaining: 12s
14:	learn: 0.6339676	total: 172ms	remaining: 11.3s
15:	learn: 0.6304252	total: 173ms	remaining: 10.7s
16:	learn: 0.6266998	total: 175ms	remaining: 10.1s
17:	learn: 0.6237660	total: 177ms	remaining: 9.65s
18:	learn: 0.6204394	total: 179ms	remaining: 9.24s
19:	learn: 0.

In [7]:
from sklearn.metrics import accuracy_score

y_pred = cat_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.7402597402597403

In [8]:
catb_params = {
    'iterations': [200, 500],
    'learning_rate': [0.01, 0.05, 0.1],
    'depth': [3, 5, 8]}
catb = CatBoostClassifier()
catb_cv_model = GridSearchCV(catb, catb_params, cv=5, n_jobs=-1, verbose=2)
catb_cv_model.fit(X_train, y_train)
catb_cv_model.best_params_

Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   35.4s
[Parallel(n_jobs=-1)]: Done  90 out of  90 | elapsed:  1.0min finished


0:	learn: 0.6863312	total: 28.1ms	remaining: 5.59s
1:	learn: 0.6811157	total: 36ms	remaining: 3.56s
2:	learn: 0.6752970	total: 43.1ms	remaining: 2.83s
3:	learn: 0.6697432	total: 52.1ms	remaining: 2.55s
4:	learn: 0.6641758	total: 58.5ms	remaining: 2.28s
5:	learn: 0.6576656	total: 65.3ms	remaining: 2.11s
6:	learn: 0.6521870	total: 73.5ms	remaining: 2.03s
7:	learn: 0.6465564	total: 86.8ms	remaining: 2.08s
8:	learn: 0.6407595	total: 93.3ms	remaining: 1.98s
9:	learn: 0.6357725	total: 101ms	remaining: 1.91s
10:	learn: 0.6299942	total: 108ms	remaining: 1.85s
11:	learn: 0.6240480	total: 115ms	remaining: 1.8s
12:	learn: 0.6196095	total: 122ms	remaining: 1.76s
13:	learn: 0.6148715	total: 128ms	remaining: 1.7s
14:	learn: 0.6092626	total: 139ms	remaining: 1.72s
15:	learn: 0.6046764	total: 148ms	remaining: 1.7s
16:	learn: 0.6012725	total: 159ms	remaining: 1.71s
17:	learn: 0.5958121	total: 165ms	remaining: 1.67s
18:	learn: 0.5903243	total: 170ms	remaining: 1.62s
19:	learn: 0.5866024	total: 176ms	rem

{'depth': 8, 'iterations': 200, 'learning_rate': 0.01}

In [9]:
catb = CatBoostClassifier(iterations = 200,
                          learning_rate = 0.01,
                          depth = 8)

catb_tuned = catb.fit(X_train, y_train)
y_pred = catb_tuned.predict(X_test)
accuracy_score(y_test, y_pred)

0:	learn: 0.6863312	total: 6.95ms	remaining: 1.38s
1:	learn: 0.6811157	total: 11.5ms	remaining: 1.14s
2:	learn: 0.6752970	total: 16.3ms	remaining: 1.07s
3:	learn: 0.6697432	total: 20.2ms	remaining: 990ms
4:	learn: 0.6641758	total: 23.9ms	remaining: 931ms
5:	learn: 0.6576656	total: 27.5ms	remaining: 890ms
6:	learn: 0.6521870	total: 31.2ms	remaining: 860ms
7:	learn: 0.6465564	total: 35.4ms	remaining: 849ms
8:	learn: 0.6407595	total: 39ms	remaining: 828ms
9:	learn: 0.6357725	total: 42.5ms	remaining: 808ms
10:	learn: 0.6299942	total: 46.7ms	remaining: 803ms
11:	learn: 0.6240480	total: 50.4ms	remaining: 790ms
12:	learn: 0.6196095	total: 53.9ms	remaining: 776ms
13:	learn: 0.6148715	total: 57.8ms	remaining: 768ms
14:	learn: 0.6092626	total: 64.4ms	remaining: 794ms
15:	learn: 0.6046764	total: 67.8ms	remaining: 780ms
16:	learn: 0.6012725	total: 71.2ms	remaining: 767ms
17:	learn: 0.5958121	total: 75.1ms	remaining: 759ms
18:	learn: 0.5903243	total: 78.8ms	remaining: 751ms
19:	learn: 0.5866024	tot

0.7532467532467533