In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV


from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
diabetes = pd.read_csv("../Data/diabetes.csv")
df = diabetes.copy()
df = df.dropna()
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df = diabetes.copy()
df = df.dropna()
y = df["Outcome"]
X = df.drop(['Outcome'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.30,
                                                    random_state=42)


In [4]:
from sklearn.preprocessing import StandardScaler

In [5]:
scaler = StandardScaler()

In [6]:
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
from sklearn.neural_network import MLPClassifier

In [8]:
mlpc = MLPClassifier().fit(X_train_scaled, y_train)

In [9]:
from sklearn.metrics import accuracy_score

y_pred = mlpc.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.7359307359307359

In [11]:
mlpc_params = {"alpha": [0.1, 0.01, 0.02, 0.005, 0.0001,0.00001],
              "hidden_layer_sizes": [(10,10,10),
                                     (100,100,100),
                                     (100,100),
                                     (3,5),
                                     (5, 3)],
              "solver" : ["lbfgs","adam","sgd"],
              "activation": ["relu","logistic"]}


In [12]:
mlpc = MLPClassifier()
mlpc_cv_model = GridSearchCV(mlpc, mlpc_params,
                         cv = 10,
                         n_jobs = -1,
                         verbose = 2)

mlpc_cv_model.fit(X_train_scaled, y_train)

Fitting 10 folds for each of 180 candidates, totalling 1800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   21.3s
[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed:   44.7s
[Parallel(n_jobs=-1)]: Done 333 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 616 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 981 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 1426 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 1800 out of 1800 | elapsed:  3.9min finished


GridSearchCV(cv=10, estimator=MLPClassifier(), n_jobs=-1,
             param_grid={'activation': ['relu', 'logistic'],
                         'alpha': [0.1, 0.01, 0.02, 0.005, 0.0001, 1e-05],
                         'hidden_layer_sizes': [(10, 10, 10), (100, 100, 100),
                                                (100, 100), (3, 5), (5, 3)],
                         'solver': ['lbfgs', 'adam', 'sgd']},
             verbose=2)

In [13]:
print("Best params: " + str(mlpc_cv_model.best_params_))

Best params: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100, 100), 'solver': 'sgd'}


In [15]:
mlpc_tuned = MLPClassifier(activation = "logistic",
                           alpha = 0.1,
                           hidden_layer_sizes = (100, 100, 100),
                          solver = "adam")

In [16]:
mlpc_tuned.fit(X_train_scaled, y_train)

MLPClassifier(activation='logistic', alpha=0.1,
              hidden_layer_sizes=(100, 100, 100))

In [17]:
y_pred = mlpc_tuned.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.7359307359307359