In [1]:
import pandas as pd

import sklearn.metrics as mt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from lightgbm import LGBMClassifier

<p style="background-image: linear-gradient(to right, #0aa98f, #68dab2)"> &nbsp; </p>

In [2]:
data = pd.read_csv('data/05_diabetes.csv')
data.head(3)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1


<p style="background-image: linear-gradient(#0aa98f, #FFFFFF 10%)"> &nbsp; </p>

In [3]:
y = data['Outcome']
X = data.drop(columns='Outcome')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
model = LGBMClassifier(verbose=-1)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

confusion_matrix = mt.confusion_matrix(y_test, predictions)
print('\nConfusion Matrix:\n', confusion_matrix)

accuracy = mt.accuracy_score(y_test, predictions)
print('\nAccuracy:', accuracy)


Confusion Matrix:
 [[91 16]
 [17 30]]

Accuracy: 0.7857142857142857


<p style="background-image: linear-gradient(#0aa98f, #FFFFFF 10%)"> &nbsp; </p>

In [5]:
%%time

params = {
    'max_depth': [3, 5, 7, 9],
    'subsample': [0.2, 0.4, 0.6, 0.8],
    'n_estimators': [100, 500, 1000, 2000],
    # 'learning_rate': [0.2, 0.4, 0.6, 0.8]
    'learning_rate': [0.001, 0.01, 0.1, 0.2]
}

grid = GridSearchCV(estimator=model, param_grid=params, cv=10, n_jobs=-1)
grid.fit(X_train, y_train)

print(grid.best_params_)
print(grid.best_score_)
print(grid.best_estimator_, '\n')

{'learning_rate': 0.001, 'max_depth': 3, 'n_estimators': 2000, 'subsample': 0.2}
0.7556848228450554
LGBMClassifier(learning_rate=0.001, max_depth=3, n_estimators=2000,
               subsample=0.2, verbose=-1) 

CPU times: user 3min 40s, sys: 48.9 s, total: 4min 29s
Wall time: 17min 3s


In [6]:
model = grid.best_estimator_
model.fit(X_train, y_train)
predictions = model.predict(X_test)

confusion_matrix = mt.confusion_matrix(y_test, predictions)
print('\nConfusion Matrix:\n', confusion_matrix)

accuracy = mt.accuracy_score(y_test, predictions)
print('\nAccuracy:', accuracy)


Confusion Matrix:
 [[96 11]
 [20 27]]

Accuracy: 0.7987012987012987


<p style="background-image: linear-gradient(to right, #0aa98f, #68dab2)"> &nbsp; </p>