# Wielowarstwowa sieć neuronowa

(*Multilayer perceptron*, *feedforward neural network*)



**Uwaga:** "Input layer" pomimo tego, że ma w nazwie słowo "warstwa", to tak naprawdę to nie jest żadna warstwa sieci... To są po prostu dane wejściowe... Niestety przyjęło się literaturze nazywanie tego w ten sposób, co jest mylące :(


Sieci uczy sie metodą spadku gradientu (pewnymi wariantami tej metody). Uczenie wykorzystuje algorytm **propagacji wstecznej** (https://en.wikipedia.org/wiki/Backpropagation).

<br>

<br>

<br>

**Uwaga!** Sieci neuronowe absolutnie zawsze wymagają zestandaryzowanych danych! Niezależnie od tego czy wykorzystujemy regularyzację czy nie i niezależnie od typu sieci!

<br>

<br>

### Fakt matematyczny: jednowarstwową siecią możemy otrzymać dowolny kształt. 

Co z tego wynika? To, że (teoretycznie) zawsze wystarczy sieć jednowarstwowa (odpowiednio duża). W praktyce rzeczywiście z reguły wystarcza jedna warstwa, ale mimo wszystko zawsze warto sprawdzić czy 2 (lub 3) nie zadziałają przypadkiem lepiej. Przy czym jeżeli dla dwóch wartsw jest gorzej, to nie ma sensu sprawdzać dla większej ilości.

In [1]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score


from sklearn.linear_model import LogisticRegression 
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sympy.stats.rv import probability

# Zad
* Wczytaj zbiór danych - pima-indians-diabetes.data
* Podziel dane na train test
* Wykonaj uczenie modeli (dobierz najlepsze parametry)
    * LogisticRegression
    * LinearSVC
    * SVC
    * KNeighborsClassifier
    * DecisionTreeClassifier
    * RandomForestClassifier
    * BaggingClassifier
    * ExtraTreesClassifier
    * AdaBoostClassifier
    * GradientBoostingClassifier
    * VotingClassifier
    * xgboost.XGBClassifier
* Porównaj wyniki na zbiorze uczącym    

In [2]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
dataset = np.loadtxt(url, delimiter=",")

X = dataset[:,0:8]
Y = dataset[:,8]

print(X.shape)
print(np.mean(Y))

seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size)

from sklearn.model_selection import StratifiedKFold

seed=123
kfold = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True)

(768, 8)
0.3489583333333333


In [3]:
from xgboost import XGBClassifier
from sklearn.ensemble import BaggingClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, \
    VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.svm import SVC

pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', LinearSVC(C=1))])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100]
}

grid_1 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_1.fit(X_train, y_train)


In [4]:
# 3) SVC (kernel='rbf')
pipe_2 = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', SVC(probability=True, random_state=seed))
])
param_grid_2 = {
    'clf__C': [0.1, 1, 10, 100],
    'clf__gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1]
}
grid_2 = GridSearchCV(pipe_2, param_grid_2, cv=kfold, return_train_score=True)
grid_2.fit(X_train, y_train)

In [5]:
# 3) LR

pipe_3 = Pipeline([
    ('scaler', StandardScaler()),           # Step 1: scale features
    ('logreg', LogisticRegression(max_iter=1000))  # Step 2: logistic regression model
])
param_grid_3 = {
    'logreg__penalty': ['l1', 'l2'],
    'logreg__solver': ['liblinear'],  # solver that supports l1 penalty
    'logreg__C': [0.01, 0.1, 1, 10],
}
grid_3 = GridSearchCV(pipe_3, param_grid_3, cv=5, scoring='accuracy')
grid_3.fit(X_train, y_train)



In [6]:
# 4) KNeighborsClassifier
pipe_4 = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', KNeighborsClassifier())
])
param_grid_4 = {
    'clf__n_neighbors': [3,5,7,9],
    'clf__weights': ['uniform', 'distance'],
    'clf__p': [1, 2]  # p=1 Manhattan, p=2 Euclidean
}
grid_4 = GridSearchCV(pipe_4, param_grid_4, cv=kfold, return_train_score=True)
grid_4.fit(X_train, y_train)


In [7]:


# 5) DecisionTreeClassifier
pipe_5 = Pipeline([
    ('clf', DecisionTreeClassifier(random_state=seed))
])
param_grid_5 = {
    'clf__max_depth': [None, 3, 5, 10],
    'clf__min_samples_split': [2, 5, 10],
    'clf__min_samples_leaf': [1, 2, 4]
}
grid_5 = GridSearchCV(pipe_5, param_grid_5, cv=kfold, return_train_score=True)
grid_5.fit(X_train, y_train)

In [8]:
# 6) RandomForestClassifier
pipe_6 = Pipeline([
    ('clf', RandomForestClassifier(random_state=seed))
])
param_grid_6 = {
    'clf__n_estimators': [50, 100, 200],
    'clf__max_depth': [None, 5, 10],
    'clf__min_samples_split': [2, 5]
}
grid_6 = GridSearchCV(pipe_6, param_grid_6, cv=kfold, return_train_score=True)
grid_6.fit(X_train, y_train)

In [9]:
# 7) BaggingClassifier (z DecisionTree jako bazowym)
pipe_7 = Pipeline([
    ('clf', BaggingClassifier(random_state=seed))
])
param_grid_7 = {
    'clf__n_estimators': [10, 50, 100],
    'clf__max_samples': [0.5, 1.0],
    'clf__max_features': [0.5, 1.0]
}
grid_7 = GridSearchCV(pipe_7, param_grid_7, cv=kfold, return_train_score=True)
grid_7.fit(X_train, y_train)


In [10]:
# 8) ExtraTreesClassifier
pipe_8 = Pipeline([
    ('clf', ExtraTreesClassifier(random_state=seed))
])
param_grid_8 = {
    'clf__n_estimators': [50, 100, 200],
    'clf__max_depth': [None, 5, 10],
    'clf__min_samples_split': [2, 5]
}
grid_8 = GridSearchCV(pipe_8, param_grid_8, cv=kfold, return_train_score=True)
grid_8.fit(X_train, y_train)


In [11]:

# 9) AdaBoostClassifier
pipe_9 = Pipeline([
    ('clf', AdaBoostClassifier(random_state=seed))
])
param_grid_9 = {
    'clf__n_estimators': [50, 100, 200],
    'clf__learning_rate': [0.01, 0.1, 1]
}
grid_9 = GridSearchCV(pipe_9, param_grid_9, cv=kfold, return_train_score=True)
grid_9.fit(X_train, y_train)

In [12]:
# 10) GradientBoostingClassifier
pipe_10 = Pipeline([
    ('clf', GradientBoostingClassifier(random_state=seed))
])
param_grid_10 = {
    'clf__n_estimators': [50, 100, 200],
    'clf__learning_rate': [0.01, 0.1, 0.2],
    'clf__max_depth': [3, 5]
}
grid_10 = GridSearchCV(pipe_10, param_grid_10, cv=kfold, return_train_score=True)
grid_10.fit(X_train, y_train)


In [13]:

# 11) VotingClassifier (ensemble 3 modeli)
pipe_11 =Pipeline( [(
    'clf',
    VotingClassifier(estimators=[
        ('lr', LogisticRegression(max_iter=1000, random_state=seed)),
        ('rf', RandomForestClassifier(random_state=seed)),
        ('svc', SVC(probability=True, random_state=seed))
    ],
        voting='soft')
)])
param_grid_11 = {
    'clf__weights': [
        [1,1,1],
        [2,1,1],
        [1,2,1],
        [1,1,2],
        [3,1,1]
    ]
}
grid_11 = GridSearchCV(pipe_11, param_grid_11, cv=kfold, return_train_score=True)
grid_11.fit(X_train, y_train)

In [14]:
# 12) XGBClassifier
pipe_12 = Pipeline([
    ('clf', XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=seed))
])
param_grid_12 = {
    'clf__n_estimators': [50, 100, 200],
    'clf__learning_rate': [0.01, 0.1, 0.2],
    'clf__max_depth': [3, 5, 7]
}
grid_12 = GridSearchCV(pipe_12, param_grid_12, cv=kfold, return_train_score=True)
grid_12.fit(X_train, y_train);


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

In [15]:
from sklearn import  metrics


models = []
models.append(('SVM linear', grid_1.best_estimator_))
models.append(('SVM rbf', grid_2.best_estimator_))
models.append(('LR', grid_3.best_estimator_))
models.append(('KNN', grid_4.best_estimator_))
models.append(('DecisionTreeClassifier', grid_5.best_estimator_))
models.append(('BaggingClassifier', grid_6.best_estimator_))
models.append(('RandomForestClassifier', grid_7.best_estimator_))
models.append(('ExtraTreesClassifier', grid_8.best_estimator_))
models.append(('AdaBoostClassifier', grid_9.best_estimator_))
models.append(('GradientBoostingClassifier', grid_10.best_estimator_))
models.append(('VotingClassifier', grid_11.best_estimator_))
models.append(('XGBClassifier', grid_12.best_estimator_))


precision_score = []
recall_score = []
f1_score = []
accuracy_score = []
roc_auc_score = []
for name, model in models:
    print(name)
    print("precision_score: {}".format(metrics.precision_score(y_test , model.predict(X_test)) ))
    print("recall_score: {}".format( metrics.recall_score(y_test , model.predict(X_test)) ))
    print("f1_score: {}".format( metrics.f1_score(y_test , model.predict(X_test)) ))
    print("accuracy_score: {}".format( metrics.accuracy_score(y_test , model.predict(X_test)) ))
    
    if (name == 'SVM linear'):
        print("roc_auc_score: {}".format( metrics.roc_auc_score(y_test , model.decision_function(X_test)) ))            
    else:
        print("roc_auc_score: {}".format( metrics.roc_auc_score(y_test , model.predict_proba(X_test)[:,1]) ))
    
    precision_score.append(metrics.precision_score(y_test , model.predict(X_test)))
    recall_score.append(metrics.recall_score(y_test , model.predict(X_test)))
    f1_score.append( metrics.f1_score(y_test , model.predict(X_test)))
    accuracy_score.append(metrics.accuracy_score(y_test , model.predict(X_test)))
    if (name == 'SVM linear'):
        roc_auc_score.append(metrics.roc_auc_score(y_test , model.decision_function(X_test)))        
    else:    
        roc_auc_score.append(metrics.roc_auc_score(y_test , model.predict_proba(X_test)[:,1]))

SVM linear
precision_score: 0.7162162162162162
recall_score: 0.6625
f1_score: 0.6883116883116883
accuracy_score: 0.8110236220472441
roc_auc_score: 0.8487068965517242
SVM rbf
precision_score: 0.75
recall_score: 0.6375
f1_score: 0.6891891891891891
accuracy_score: 0.8188976377952756
roc_auc_score: 0.8508620689655173
LR
precision_score: 0.7066666666666667
recall_score: 0.6625
f1_score: 0.6838709677419355
accuracy_score: 0.8070866141732284
roc_auc_score: 0.8512212643678161
KNN
precision_score: 0.5647058823529412
recall_score: 0.6
f1_score: 0.5818181818181818
accuracy_score: 0.7283464566929134
roc_auc_score: 0.7819324712643678
DecisionTreeClassifier
precision_score: 0.6
recall_score: 0.75
f1_score: 0.6666666666666666
accuracy_score: 0.7637795275590551
roc_auc_score: 0.8032686781609195
BaggingClassifier
precision_score: 0.7051282051282052
recall_score: 0.6875
f1_score: 0.6962025316455697
accuracy_score: 0.8110236220472441
roc_auc_score: 0.8620689655172413
RandomForestClassifier
precision_scor

In [16]:
import pandas as pd
d = {'precision_score': precision_score, 
     'recall_score': recall_score, 
     'f1_score': f1_score,
     'accuracy_score' : accuracy_score,
     'roc_auc_score' : roc_auc_score
    }
df = pd.DataFrame(data=d)
df.insert(loc=0, column='Method', value=['SVM linear','SVM rbf','LR','KNN', 'DecisionTreeClassifier','BaggingClassifier','RandomForestClassifier','ExtraTreesClassifier', 'AdaBoostClassifier','GradientBoostingClassifier','XGBClassifier', 'voting'])
df

Unnamed: 0,Method,precision_score,recall_score,f1_score,accuracy_score,roc_auc_score
0,SVM linear,0.716216,0.6625,0.688312,0.811024,0.848707
1,SVM rbf,0.75,0.6375,0.689189,0.818898,0.850862
2,LR,0.706667,0.6625,0.683871,0.807087,0.851221
3,KNN,0.564706,0.6,0.581818,0.728346,0.781932
4,DecisionTreeClassifier,0.6,0.75,0.666667,0.76378,0.803269
5,BaggingClassifier,0.705128,0.6875,0.696203,0.811024,0.862069
6,RandomForestClassifier,0.654762,0.6875,0.670732,0.787402,0.837177
7,ExtraTreesClassifier,0.675,0.675,0.675,0.795276,0.854418
8,AdaBoostClassifier,0.716216,0.6625,0.688312,0.811024,0.856286
9,GradientBoostingClassifier,0.678571,0.7125,0.695122,0.80315,0.847917


# MLPClassifier

Dodajmy model sieci neuronowej

In [17]:
from sklearn.neural_network import MLPClassifier

In [18]:
model = MLPClassifier((20,10))
model.fit(X_train, y_train)

y_pred = model.predict_proba(X_test)[:,1]
predictions = y_pred.round()

accuracy = metrics.accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0), "AUC: ", metrics.roc_auc_score(y_score=y_pred,y_true=y_test))

Accuracy: 72.44% AUC:  0.70308908045977




# Zad
Wykonaj Walidację krzyżową

In [19]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', MLPClassifier())])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__hidden_layer_sizes': [(20,10)],
            'classifier__learning_rate_init': [0.001],#, 0.01, 0.1],
            'classifier__max_iter': [100],
            'classifier__batch_size': [8, 16,32],
}

grid_13 = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_13.fit(X_train, y_train)
grid_13.best_params_



{'classifier__batch_size': 8,
 'classifier__hidden_layer_sizes': (20, 10),
 'classifier__learning_rate_init': 0.001,
 'classifier__max_iter': 100,
 'preprocessing': StandardScaler()}

In [20]:
metrics.accuracy_score(y_test, grid_2.best_estimator_.predict(X_test))

0.8188976377952756

In [21]:
from sklearn import  metrics


models = []
models.append(('SVM linear', grid_1.best_estimator_))
models.append(('SVM rbf', grid_2.best_estimator_))
models.append(('LR', grid_3.best_estimator_))
models.append(('KNN', grid_4.best_estimator_))
models.append(('DecisionTreeClassifier', grid_5.best_estimator_))
models.append(('BaggingClassifier', grid_6.best_estimator_))
models.append(('RandomForestClassifier', grid_7.best_estimator_))
models.append(('ExtraTreesClassifier', grid_8.best_estimator_))
models.append(('AdaBoostClassifier', grid_9.best_estimator_))
models.append(('GradientBoostingClassifier', grid_10.best_estimator_))
models.append(('VotingClassifier', grid_11.best_estimator_))
models.append(('XGBClassifier', grid_12.best_estimator_))
models.append(('MLP', grid_13.best_estimator_))

precision_score = []
recall_score = []
f1_score = []
accuracy_score = []
roc_auc_score = []
for name, model in models:
    print(name)
    print("precision_score: {}".format(metrics.precision_score(y_test , model.predict(X_test)) ))
    print("recall_score: {}".format( metrics.recall_score(y_test , model.predict(X_test)) ))
    print("f1_score: {}".format( metrics.f1_score(y_test , model.predict(X_test)) ))
    print("accuracy_score: {}".format( metrics.accuracy_score(y_test , model.predict(X_test)) ))
    
    if (name == 'SVM linear'):
        print("roc_auc_score: {}".format( metrics.roc_auc_score(y_test , model.decision_function(X_test)) ))            
    else:
        print("roc_auc_score: {}".format( metrics.roc_auc_score(y_test , model.predict_proba(X_test)[:,1]) ))
    
    precision_score.append(metrics.precision_score(y_test , model.predict(X_test)))
    recall_score.append(metrics.recall_score(y_test , model.predict(X_test)))
    f1_score.append( metrics.f1_score(y_test , model.predict(X_test)))
    accuracy_score.append(metrics.accuracy_score(y_test , model.predict(X_test)))
    if (name == 'SVM linear'):
        roc_auc_score.append(metrics.roc_auc_score(y_test , model.decision_function(X_test)))        
    else:    
        roc_auc_score.append(metrics.roc_auc_score(y_test , model.predict_proba(X_test)[:,1]))

SVM linear
precision_score: 0.7162162162162162
recall_score: 0.6625
f1_score: 0.6883116883116883
accuracy_score: 0.8110236220472441
roc_auc_score: 0.8487068965517242
SVM rbf
precision_score: 0.75
recall_score: 0.6375
f1_score: 0.6891891891891891
accuracy_score: 0.8188976377952756
roc_auc_score: 0.8508620689655173
LR
precision_score: 0.7066666666666667
recall_score: 0.6625
f1_score: 0.6838709677419355
accuracy_score: 0.8070866141732284
roc_auc_score: 0.8512212643678161
KNN
precision_score: 0.5647058823529412
recall_score: 0.6
f1_score: 0.5818181818181818
accuracy_score: 0.7283464566929134
roc_auc_score: 0.7819324712643678
DecisionTreeClassifier
precision_score: 0.6
recall_score: 0.75
f1_score: 0.6666666666666666
accuracy_score: 0.7637795275590551
roc_auc_score: 0.8032686781609195
BaggingClassifier
precision_score: 0.7051282051282052
recall_score: 0.6875
f1_score: 0.6962025316455697
accuracy_score: 0.8110236220472441
roc_auc_score: 0.8620689655172413
RandomForestClassifier
precision_scor

In [22]:
import pandas as pd
d = {'precision_score': precision_score, 
     'recall_score': recall_score, 
     'f1_score': f1_score,
     'accuracy_score' : accuracy_score,
     'roc_auc_score' : roc_auc_score
    }
df = pd.DataFrame(data=d)
df.insert(loc=0, column='Method', value=['SVM linear','SVM rbf','LR','KNN', 'DecisionTreeClassifier','BaggingClassifier','RandomForestClassifier','ExtraTreesClassifier', 'AdaBoostClassifier','GradientBoostingClassifier','XGBClassifier', 'voting', 'MLP'])
df

Unnamed: 0,Method,precision_score,recall_score,f1_score,accuracy_score,roc_auc_score
0,SVM linear,0.716216,0.6625,0.688312,0.811024,0.848707
1,SVM rbf,0.75,0.6375,0.689189,0.818898,0.850862
2,LR,0.706667,0.6625,0.683871,0.807087,0.851221
3,KNN,0.564706,0.6,0.581818,0.728346,0.781932
4,DecisionTreeClassifier,0.6,0.75,0.666667,0.76378,0.803269
5,BaggingClassifier,0.705128,0.6875,0.696203,0.811024,0.862069
6,RandomForestClassifier,0.654762,0.6875,0.670732,0.787402,0.837177
7,ExtraTreesClassifier,0.675,0.675,0.675,0.795276,0.854418
8,AdaBoostClassifier,0.716216,0.6625,0.688312,0.811024,0.856286
9,GradientBoostingClassifier,0.678571,0.7125,0.695122,0.80315,0.847917


# Wczytaj dane treningowe i testowe

In [23]:
# Wczytaj dane treningowe i testowe

import pandas as pd
url_train = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
url_test = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test'
train_set = pd.read_csv(url_train, sep=", ",header = None)
test_set = pd.read_csv(url_test, sep=", ",skiprows = 1, header = None) # Make sure to skip a row for the test set


col_labels = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status', 'occupation', 
              'relationship', 'race', 'sex', 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
             'wage_class']
train_set.columns = col_labels
test_set.columns = col_labels

train = train_set.replace('?', np.nan).dropna()
test = test_set.replace('?', np.nan).dropna()



dataset = pd.concat([train,test])

dataset['wage_class'] = dataset.wage_class.replace({'<=50K.': 0,'<=50K':0, '>50K.':1, '>50K':1})

dataset.drop(["fnlwgt"],axis=1,inplace=True)

dataset.drop(["education"],axis=1,inplace=True)

x = dataset.groupby('native_country')["wage_class"].mean()

d = dict(pd.cut(x[x.index!=" United-States"],5,labels=range(5)))

dataset['native_country'] = dataset['native_country'].replace(d)

dataset = pd.get_dummies(dataset,drop_first=True)

train = dataset.iloc[:train.shape[0]]
test = dataset.iloc[train.shape[0]:]

X_train = train.drop("wage_class",axis=1)
y_train = train.wage_class

X_test = test.drop("wage_class",axis=1)
y_test = test.wage_class



  train_set = pd.read_csv(url_train, sep=", ",header = None)
  test_set = pd.read_csv(url_test, sep=", ",skiprows = 1, header = None) # Make sure to skip a row for the test set
  dataset['wage_class'] = dataset.wage_class.replace({'<=50K.': 0,'<=50K':0, '>50K.':1, '>50K':1})
  dataset['native_country'] = dataset['native_country'].replace(d)


In [24]:
print(X_train.shape)
print(X_test.shape)

(30162, 41)
(15060, 41)


# Zad
Porównaj wyniki sieci na:
* oryginalnych danych 
* na wystandaryzowanych

In [None]:
pipe = Pipeline([('preprocessing', StandardScaler()), ('classifier', MLPClassifier())])

param_grid = {
            'preprocessing': [StandardScaler(), None],
            'classifier__hidden_layer_sizes': [(100,10)],
            'classifier__learning_rate_init': [0.001],#, 0.01, 0.1],
            'classifier__max_iter': [100],
            'classifier__batch_size': [8, 16,32],
}

grid_scaled = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid_scaled.fit(X_train, y_train)


pipe = Pipeline([ ('classifier', MLPClassifier())])

param_grid = {
            'classifier__hidden_layer_sizes': [(100,10)],
            'classifier__learning_rate_init': [0.001],#, 0.01, 0.1],
            'classifier__max_iter': [100],
            'classifier__batch_size': [8, 16,32],
}

grid = GridSearchCV(pipe, param_grid, cv=kfold, return_train_score=True)

grid.fit(X_train, y_train)

models = [grid_scaled, grid]
for model in models:
    print("precision_score: {}".format(metrics.precision_score(y_test , model.predict(X_test)) ))
    print("recall_score: {}".format( metrics.recall_score(y_test , model.predict(X_test)) ))
    print("f1_score: {}".format( metrics.f1_score(y_test , model.predict(X_test)) ))
    print("accuracy_score: {}".format( metrics.accuracy_score(y_test , model.predict(X_test)) ))
