In [3]:
import numpy as np
import pandas as pd 
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, mean_squared_error, r2_score, roc_auc_score, roc_curve, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [6]:
import warnings
warnings.filterwarnings("ignore",category = DeprecationWarning)
warnings.filterwarnings("ignore",category = FutureWarning)

In [9]:
df = pd.read_csv("diabetes.csv")

In [12]:
y = df["Outcome"]
X = df.drop(["Outcome"],axis=1)

In [15]:
X_train, X_test , y_train, y_test = train_test_split(X,
                                                     y,
                                                     test_size = 0.30,
                                                     random_state = 42)

In [63]:
scaler = StandardScaler()

In [65]:
scaler.fit(X_train)
X_train = scaler.transform(X_train)

In [67]:
scaler.fit(X_test)
X_test = scaler.transform(X_test)

In [69]:
mlpc_model = MLPClassifier().fit(X_train,y_train)



In [70]:
mlpc_model.coefs_

[array([[-9.19150064e-02,  3.13796531e-01,  4.85186089e-03,
         -4.26771225e-02, -9.75248938e-02, -3.16350195e-01,
         -2.26411450e-01,  2.54747109e-01,  2.08387475e-01,
          2.31316828e-02,  1.68193846e-01, -1.86919150e-01,
         -1.79707802e-01, -6.83706593e-02,  6.86382903e-02,
          1.78520770e-01, -1.51343179e-01, -1.16132851e-01,
         -5.33476041e-02,  2.68220633e-01, -6.30071754e-02,
         -3.04000994e-01, -1.62712523e-01, -5.67883793e-02,
          1.00618317e-01,  2.91384953e-02, -5.78237492e-02,
         -1.32342716e-01, -3.51616331e-01, -1.79930423e-01,
         -2.11626166e-01, -3.06121925e-02,  9.36562734e-03,
         -1.22857109e-01, -5.69814332e-01,  1.81859096e-01,
          5.77524026e-02,  1.00887543e-02,  1.97746532e-02,
         -1.12619476e-01,  5.16744846e-02,  1.43062157e-02,
         -2.90538911e-02,  2.33288269e-01,  1.77257850e-02,
         -3.05351840e-01, -1.67257496e-01,  2.03023849e-01,
          1.76607121e-01,  1.71389972e-0

In [73]:
y_pred = mlpc_model.predict(X_test)

In [75]:
accuracy_score(y_test,y_pred)

0.7402597402597403

### Model Tuning

In [78]:
mlpc_params = {"alpha":[1,5,0.1,0.01,0.03,0.005,0.0001],
               "hidden_layer_sizes" : [(10,10),(100,100,100),(100,100),(3,5)]}

In [91]:
mlpc = MLPClassifier(solver="lbfgs",max_iter = 10000)

In [92]:
mlpc_cv_model = GridSearchCV(mlpc,mlpc_params,cv=10,n_jobs=-1,verbose=2).fit(X_train,y_train)

Fitting 10 folds for each of 28 candidates, totalling 280 fits
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   0.2s
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   0.3s
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   0.4s
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   0.4s
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   0.4s
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   0.5s
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   0.6s
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   0.6s
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   0.7s
[CV] END ...............alpha=1, hidden_layer_sizes=(10, 10); total time=   1.0s
[CV] END ........alpha=1, hidden_layer_sizes=(100, 100, 100); total time=   7.2s
[CV] END ........alpha=1, hidden_layer_sizes=(

KeyboardInterrupt: 

In [51]:
mlpc_cv_model.best_params_

{'alpha': 5, 'hidden_layer_sizes': (3, 5)}

In [53]:
mlpc_cv_model.best_score_

0.7337526205450734

In [57]:
mlpc_tuned = MLPClassifier(solver="lbfgs",alpha=5,hidden_layer_sizes=(3,5),max_iter=1000).fit(X_train,y_train)

In [59]:
y_pred =mlpc_tuned.predict(X_test)

In [61]:
accuracy_score(y_test,y_pred)

0.7142857142857143

# CART(Classification and Regression Tree)

Amac veri seti icerisindeki karmasik yapilari basit karar yapilarina donusturmektir

In [96]:
X_train, X_test , y_train, y_test = train_test_split(X,
                                                     y,
                                                     test_size = 0.30,
                                                     random_state = 42)

In [98]:
cart_model = DecisionTreeClassifier().fit(X_train,y_train)

In [100]:
cart_model

In [102]:
y_pred = cart_model.predict(X_test)

In [104]:
accuracy_score(y_test,y_pred)

0.7012987012987013

In [106]:
cart = DecisionTreeClassifier()

In [114]:
cart_params = {"max_depth": [1,3,5,8,10],
               "min_samples_split":[2,3,5,10,20,50]}

In [116]:
cart_cv_model = GridSearchCV(cart,cart_params,cv=10,n_jobs=-1,verbose=2).fit(X_train,y_train)

Fitting 10 folds for each of 30 candidates, totalling 300 fits
[CV] END ...................max_depth=1, min_samples_split=2; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=3; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=3; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=3; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=3; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=2; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=2; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=3; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=3; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=3; total time=   0.0s
[CV] END ...................max_depth=1, min_samples_split=3; total time=   0.0s
[CV] END ...................max_depth=1, min_s

In [118]:
cart_cv_model.best_params_

{'max_depth': 5, 'min_samples_split': 20}

In [120]:
#final model

In [122]:
cart_tuned = DecisionTreeClassifier(max_depth=5,min_samples_split=20).fit(X_train,y_train)

In [126]:
y_pred = cart_tuned.predict(X_test)

In [128]:
accuracy_score(y_test,y_pred)

0.7532467532467533