In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, confusion_matrix,roc_curve,roc_auc_score,classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [3]:
import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)
warnings.filterwarnings("ignore",category=FutureWarning)

In [4]:
df = pd.read_csv("./diabetes.csv")

## Gradient Boosting Machines (GBM)

In [5]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
y = df["Outcome"]
X = df.drop(['Outcome'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                   test_size=0.30,
                                                   random_state=42)

In [7]:
gbm_model = GradientBoostingClassifier().fit(X_train, y_train)

In [8]:
y_pred = gbm_model.predict(X_test)

In [9]:
accuracy_score(y_test, y_pred)

0.7532467532467533

In [22]:
gbm_params={"learning_rate":[0.1,0.01, 0.001, 0.005],
            "n_estimators":[100,300,500,1000],
            "max_depth":[2,3,5,6]}

In [23]:
gbm_cv_model = GridSearchCV(gbm_model,gbm_params,cv=10,n_jobs=-1,verbose=2).fit(X_train, y_train)

Fitting 10 folds for each of 64 candidates, totalling 640 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 178 tasks      | elapsed:   22.7s
[Parallel(n_jobs=-1)]: Done 381 tasks      | elapsed:   50.9s
[Parallel(n_jobs=-1)]: Done 640 out of 640 | elapsed:  1.6min finished


In [24]:
gbm_cv_model.best_params_

{'learning_rate': 0.005, 'max_depth': 5, 'n_estimators': 1000}

In [25]:
gbm_tuned =GradientBoostingClassifier(learning_rate=0.005, max_depth=5, n_estimators=1000).fit(X_train,y_train)

In [26]:
y_pred = gbm_tuned.predict(X_test)

In [27]:
accuracy_score(y_test,y_pred)

0.7402597402597403