In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import recall_score
from sklearn.metrics import  f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix
from xgboost import XGBClassifier
import lightgbm as lgb
from catboost import CatBoostClassifier
import random
import time
import sys
import warnings
warnings.filterwarnings("ignore")


np.random.seed(123)

In [9]:
df = pd.read_csv("diabetes.csv")
df.head(5)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [10]:
df.dropna(inplace=True)
target  = np.array(df["Outcome"])
df = df.drop("Outcome", axis=1)
df_list = list(df.columns)
df_list


['Pregnancies',
 'Glucose',
 'BloodPressure',
 'SkinThickness',
 'Insulin',
 'BMI',
 'DiabetesPedigreeFunction',
 'Age']

In [11]:
df.shape

(768, 8)

In [13]:
train_x, test_x, train_y, test_y = train_test_split(df, target, test_size=0.2, random_state=42)

In [15]:
lgbm = lgb.LGBMClassifier()
lgbm.fit(train_x, train_y, eval_metric="auc")
pred = lgbm.predict(test_x)
pred_prob = lgbm.predict_proba(test_x)[:,1]
print(confusion_matrix(test_y, pred))
print("Accuracy:", accuracy_score(test_y, pred))
print("Auc Score:", roc_auc_score(test_y, pred))
print("Auc Score Prob:", roc_auc_score(test_y, pred_prob))
print("Recall:", recall_score(test_y, pred))
print("Precission:", precision_score(test_y, pred))
print("F1 Score:", f1_score(test_y, pred))

[[73 26]
 [17 38]]
Accuracy: 0.7207792207792207
Auc Score: 0.7141414141414141
Auc Score Prob: 0.767860422405877
Recall: 0.6909090909090909
Precission: 0.59375
F1 Score: 0.6386554621848739


In [17]:
cb = CatBoostClassifier()
cb.fit(train_x, train_y)
pred1 = cb.predict(test_x)
pred_prob1 = cb.predict_proba(test_x)[:,1]
print(confusion_matrix(test_y, pred))
print("Accuracy:", accuracy_score(test_y, pred))
print("Auc Score:", roc_auc_score(test_y, pred))
print("Auc Score Prob:", roc_auc_score(test_y, pred_prob))
print("Recall:", recall_score(test_y, pred))
print("Precission:", precision_score(test_y, pred))
print("F1 Score:", f1_score(test_y, pred))

Learning rate set to 0.008365
0:	learn: 0.6888523	total: 1.36ms	remaining: 1.36s
1:	learn: 0.6853068	total: 2.77ms	remaining: 1.38s
2:	learn: 0.6814687	total: 4.2ms	remaining: 1.4s
3:	learn: 0.6771790	total: 5.46ms	remaining: 1.36s
4:	learn: 0.6737322	total: 7.09ms	remaining: 1.41s
5:	learn: 0.6695879	total: 9.18ms	remaining: 1.52s
6:	learn: 0.6653628	total: 10.6ms	remaining: 1.51s
7:	learn: 0.6615381	total: 12.2ms	remaining: 1.51s
8:	learn: 0.6574196	total: 13.4ms	remaining: 1.48s
9:	learn: 0.6548021	total: 14.1ms	remaining: 1.4s
10:	learn: 0.6507003	total: 15.3ms	remaining: 1.38s
11:	learn: 0.6475684	total: 16.7ms	remaining: 1.37s
12:	learn: 0.6445029	total: 17.9ms	remaining: 1.36s
13:	learn: 0.6408870	total: 19.1ms	remaining: 1.34s
14:	learn: 0.6380298	total: 20.4ms	remaining: 1.34s
15:	learn: 0.6340884	total: 21.6ms	remaining: 1.33s
16:	learn: 0.6303093	total: 22.8ms	remaining: 1.32s
17:	learn: 0.6268034	total: 24ms	remaining: 1.31s
18:	learn: 0.6242394	total: 25.2ms	remaining: 1.3

278:	learn: 0.3590808	total: 366ms	remaining: 947ms
279:	learn: 0.3587297	total: 370ms	remaining: 952ms
280:	learn: 0.3583434	total: 372ms	remaining: 951ms
281:	learn: 0.3579532	total: 373ms	remaining: 949ms
282:	learn: 0.3577127	total: 374ms	remaining: 948ms
283:	learn: 0.3572186	total: 375ms	remaining: 946ms
284:	learn: 0.3567160	total: 376ms	remaining: 944ms
285:	learn: 0.3563108	total: 378ms	remaining: 943ms
286:	learn: 0.3557666	total: 379ms	remaining: 941ms
287:	learn: 0.3553178	total: 381ms	remaining: 941ms
288:	learn: 0.3549805	total: 382ms	remaining: 940ms
289:	learn: 0.3547055	total: 383ms	remaining: 939ms
290:	learn: 0.3545086	total: 385ms	remaining: 938ms
291:	learn: 0.3542290	total: 387ms	remaining: 937ms
292:	learn: 0.3537283	total: 388ms	remaining: 937ms
293:	learn: 0.3534865	total: 390ms	remaining: 936ms
294:	learn: 0.3531489	total: 391ms	remaining: 935ms
295:	learn: 0.3529009	total: 393ms	remaining: 935ms
296:	learn: 0.3526814	total: 395ms	remaining: 935ms
297:	learn: 

550:	learn: 0.2855686	total: 739ms	remaining: 602ms
551:	learn: 0.2853221	total: 741ms	remaining: 601ms
552:	learn: 0.2851988	total: 747ms	remaining: 604ms
553:	learn: 0.2849952	total: 749ms	remaining: 603ms
554:	learn: 0.2846460	total: 751ms	remaining: 603ms
555:	learn: 0.2844480	total: 753ms	remaining: 601ms
556:	learn: 0.2843782	total: 755ms	remaining: 600ms
557:	learn: 0.2840507	total: 757ms	remaining: 600ms
558:	learn: 0.2838110	total: 759ms	remaining: 598ms
559:	learn: 0.2835975	total: 760ms	remaining: 597ms
560:	learn: 0.2833301	total: 765ms	remaining: 599ms
561:	learn: 0.2831050	total: 767ms	remaining: 598ms
562:	learn: 0.2829353	total: 769ms	remaining: 597ms
563:	learn: 0.2827271	total: 771ms	remaining: 596ms
564:	learn: 0.2825298	total: 772ms	remaining: 595ms
565:	learn: 0.2823376	total: 774ms	remaining: 593ms
566:	learn: 0.2821789	total: 775ms	remaining: 592ms
567:	learn: 0.2819688	total: 777ms	remaining: 591ms
568:	learn: 0.2816683	total: 780ms	remaining: 591ms
569:	learn: 

754:	learn: 0.2464669	total: 1.11s	remaining: 359ms
755:	learn: 0.2461918	total: 1.11s	remaining: 358ms
756:	learn: 0.2460538	total: 1.11s	remaining: 357ms
757:	learn: 0.2458631	total: 1.11s	remaining: 355ms
758:	learn: 0.2457780	total: 1.11s	remaining: 354ms
759:	learn: 0.2456337	total: 1.12s	remaining: 353ms
760:	learn: 0.2454434	total: 1.12s	remaining: 351ms
761:	learn: 0.2451890	total: 1.12s	remaining: 350ms
762:	learn: 0.2451053	total: 1.12s	remaining: 349ms
763:	learn: 0.2449223	total: 1.12s	remaining: 348ms
764:	learn: 0.2446700	total: 1.13s	remaining: 346ms
765:	learn: 0.2444930	total: 1.13s	remaining: 345ms
766:	learn: 0.2441697	total: 1.13s	remaining: 344ms
767:	learn: 0.2441241	total: 1.13s	remaining: 342ms
768:	learn: 0.2439607	total: 1.14s	remaining: 341ms
769:	learn: 0.2437543	total: 1.14s	remaining: 339ms
770:	learn: 0.2436033	total: 1.14s	remaining: 338ms
771:	learn: 0.2434754	total: 1.14s	remaining: 337ms
772:	learn: 0.2432345	total: 1.15s	remaining: 336ms
773:	learn: 

955:	learn: 0.2127550	total: 1.48s	remaining: 67.9ms
956:	learn: 0.2125379	total: 1.48s	remaining: 66.6ms
957:	learn: 0.2123557	total: 1.48s	remaining: 65.1ms
958:	learn: 0.2120140	total: 1.49s	remaining: 63.5ms
959:	learn: 0.2118021	total: 1.49s	remaining: 62ms
960:	learn: 0.2116012	total: 1.49s	remaining: 60.4ms
961:	learn: 0.2114336	total: 1.49s	remaining: 58.9ms
962:	learn: 0.2113203	total: 1.5s	remaining: 57.5ms
963:	learn: 0.2112139	total: 1.5s	remaining: 56ms
964:	learn: 0.2110273	total: 1.5s	remaining: 54.4ms
965:	learn: 0.2108608	total: 1.5s	remaining: 52.9ms
966:	learn: 0.2107156	total: 1.5s	remaining: 51.4ms
967:	learn: 0.2105985	total: 1.51s	remaining: 49.8ms
968:	learn: 0.2104225	total: 1.51s	remaining: 48.3ms
969:	learn: 0.2103009	total: 1.51s	remaining: 46.8ms
970:	learn: 0.2101378	total: 1.51s	remaining: 45.2ms
971:	learn: 0.2100190	total: 1.51s	remaining: 43.6ms
972:	learn: 0.2098873	total: 1.52s	remaining: 42.1ms
973:	learn: 0.2097212	total: 1.52s	remaining: 40.5ms
97