In [1]:
%run utils/preparation.ipynb

In [2]:
%run utils/exploration.ipynb

In [3]:
%run utils/MLP_utils.ipynb

In [4]:
import pandas as pd
fpath = "data.csv"
df = pd.read_csv(fpath, quotechar="'") # Load csv as pandas df
df.rename( columns={
        'tumor-size': 'tumor_size',
        'inv-nodes': 'inv_nodes',
        'node-caps' : 'node_caps',
        'deg-malig' : 'deg_malig',
        'breast-quad' : 'breast_quad'
    }, inplace=True)

df = remove_missing_values(df)
df_encoded, label_encoder = encode_df(df)
X, y = get_nn_inputs(df_encoded)

In [5]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

steps = [
    ('scaler', StandardScaler()),
    ('clf', MLPClassifier())
]

pipeline = Pipeline(steps)

In [17]:
import numpy as np
from itertools import product

max_layers = 5
max_neurons = 20
layer_space_list = []
for i in range(1, max_layers+1):
    i_layers = list(product(list(range(1, max_neurons+1)), repeat=i))
    layer_space_list.extend(i_layers) # all config: i layers
layer_space = tuple(layer_space_list)

param_grid = {
    'clf__hidden_layer_sizes' : layer_space, # (100,)
    'clf__max_iter' : (500, 800, 1000),
    'clf__activation' : ['identity', 'logistic', 'tanh', 'relu'], # 'relu'
    'clf__solver' : ['lbfgs', 'sgd', 'adam'], #'adam'
    'clf__alpha' : np.linspace(start=0.00001, stop=0.001, num=50), #0.0001
    'clf__learning_rate' : ['constant', 'invscaling', 'adaptive'], #'constant'
    'clf__learning_rate_init' : np.linspace(start=0.0001, stop=0.01, num=50), #0.001
    'clf__momentum' : np.linspace(start=0.1, stop=1, num=10) #0.9
}

In [18]:
from sklearn.model_selection import RandomizedSearchCV

random_search = RandomizedSearchCV(
    pipeline,
    param_distributions=param_grid,
    n_iter=50,
    n_jobs=8,
    refit=True,
    cv=10, # 10-fold
    verbose=1,
    random_state=None
)

random_search.fit(X, y)
print("best params:\n{}".format(random_search.best_params_))
print("best score :\n{}".format(random_search.best_score_))

Fitting 10 folds for each of 20 candidates, totalling 200 fits


[Parallel(n_jobs=8)]: Done 124 tasks      | elapsed:    4.6s
[Parallel(n_jobs=8)]: Done 185 out of 200 | elapsed:   10.4s remaining:    0.8s


best params:
{'clf__solver': 'adam', 'clf__momentum': 0.20000000000000001, 'clf__max_iter': 800, 'clf__learning_rate_init': 0.0039387755102040824, 'clf__learning_rate': 'constant', 'clf__hidden_layer_sizes': (17, 10, 10, 9, 5), 'clf__alpha': 0.00079795918367346944, 'clf__activation': 'identity'}
best score :
0.7473684210526316


[Parallel(n_jobs=8)]: Done 200 out of 200 | elapsed:   12.4s finished
