# Light GBM

In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.model_selection import GridSearchCV

diabetes = pd.read_csv('diabetes.csv')
df = diabetes.copy()
df = df.dropna()
y = df['Outcome']
X = df.drop('Outcome', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                   test_size=0.3,
                                                   random_state=238)

## Model & Tahmin

In [2]:
from lightgbm import LGBMClassifier

In [3]:
lgbm_model = LGBMClassifier().fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 190, number of negative: 347
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 616
[LightGBM] [Info] Number of data points in the train set: 537, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.353818 -> initscore=-0.602301
[LightGBM] [Info] Start training from score -0.602301


In [6]:
y_train_pred = lgbm_model.predict(X_train)
acc_train = accuracy_score(y_train_pred, y_train)
acc_train

1.0

In [5]:
y_test_pred = lgbm_model.predict(X_test)
acc_test = accuracy_score(y_test_pred, y_test)
acc_test

0.7445887445887446

## Model Tuning

In [6]:
?lgbm_model

[1;31mType:[0m           LGBMClassifier
[1;31mString form:[0m    LGBMClassifier()
[1;31mFile:[0m           c:\users\alperen arda\appdata\local\programs\python\python311\lib\site-packages\lightgbm\sklearn.py
[1;31mDocstring:[0m      LightGBM classifier.
[1;31mInit docstring:[0m
Construct a gradient boosting model.

Parameters
----------
boosting_type : str, optional (default='gbdt')
    'gbdt', traditional Gradient Boosting Decision Tree.
    'dart', Dropouts meet Multiple Additive Regression Trees.
    'rf', Random Forest.
num_leaves : int, optional (default=31)
    Maximum tree leaves for base learners.
max_depth : int, optional (default=-1)
    Maximum tree depth for base learners, <=0 means no limit.
    If setting this to a positive value, consider also changing ``num_leaves`` to ``<= 2^max_depth``.
learning_rate : float, optional (default=0.1)
    Boosting learning rate.
    You can use ``callbacks`` parameter of ``fit`` method to shrink/adapt learning rate
    in traini

In [7]:
lgbm_params = {
    'n_estimators': [100, 500, 1000, 2000],
    'subsample': [0.6, 0.8, 1.0],
    'max_depth': [3, 4, 5, 6],
    'learning_rate': [0.1, 0.01, 0.02, 0.05],
    'min_child_samples': [5, 10, 20]
}

In [8]:
lgbm = LGBMClassifier()
lgbm_cv_model = GridSearchCV(lgbm, lgbm_params, cv=10, n_jobs=-1, verbose=2)
lgbm_cv_model.fit(X_train, y_train)

Fitting 10 folds for each of 576 candidates, totalling 5760 fits
[LightGBM] [Info] Number of positive: 190, number of negative: 347
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 616
[LightGBM] [Info] Number of data points in the train set: 537, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.353818 -> initscore=-0.602301
[LightGBM] [Info] Start training from score -0.602301


In [9]:
lgbm_cv_model.best_params_

{'learning_rate': 0.05,
 'max_depth': 4,
 'min_child_samples': 20,
 'n_estimators': 100,
 'subsample': 0.6}

In [13]:
lgbm_tuned_model = LGBMClassifier(learning_rate=0.05, 
                                  max_depth=4, 
                                  min_child_samples=20, 
                                  n_estimators=100, 
                                  subsample=0.6)
lgbm_tuned_model.fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 190, number of negative: 347
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000067 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 616
[LightGBM] [Info] Number of data points in the train set: 537, number of used features: 8
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.353818 -> initscore=-0.602301
[LightGBM] [Info] Start training from score -0.602301


In [14]:
y_train_pred = lgbm_tuned_model.predict(X_train)
y_train_acc = accuracy_score(y_train, y_train_pred)
y_train_acc

0.8715083798882681

In [15]:
y_test_pred = lgbm_tuned_model.predict(X_test)
y_test_acc = accuracy_score(y_test, y_test_pred)
y_test_acc

0.7489177489177489