In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn import preprocessing


In [3]:
df = pd.read_csv('../Datasets/breast-cancer-data.csv')
df

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,diagnosis
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,malignant
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,malignant
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,malignant
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,malignant
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,malignant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,malignant
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,malignant
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,malignant
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,malignant


The next step is feature engineering. Different columns of this dataset have different scales of magnitude, hence, before constructing and training a neural network model, we normalize the dataset. For this, we use the MinMaxScaler API from sklearn, which normalizes each column's values between 0 and 1, as discussed in the Logistic Regression section of this chapter (see Exercise 5.03, Logistic Regression – Multiclass Classifier)

In [19]:
X_orig, y_orig = df.loc[:,'mean radius':'worst fractal dimension'], df.loc[:,'diagnosis']

In [20]:
scaler = preprocessing.MinMaxScaler()
scaled = scaler.fit_transform(X_orig)
X = pd.DataFrame(scaled, columns=X_orig.columns)
X

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,0.521037,0.022658,0.545989,0.363733,0.593753,0.792037,0.703140,0.731113,0.686364,0.605518,...,0.620776,0.141525,0.668310,0.450698,0.601136,0.619292,0.568610,0.912027,0.598462,0.418864
1,0.643144,0.272574,0.615783,0.501591,0.289880,0.181768,0.203608,0.348757,0.379798,0.141323,...,0.606901,0.303571,0.539818,0.435214,0.347553,0.154563,0.192971,0.639175,0.233590,0.222878
2,0.601496,0.390260,0.595743,0.449417,0.514309,0.431017,0.462512,0.635686,0.509596,0.211247,...,0.556386,0.360075,0.508442,0.374508,0.483590,0.385375,0.359744,0.835052,0.403706,0.213433
3,0.210090,0.360839,0.233501,0.102906,0.811321,0.811361,0.565604,0.522863,0.776263,1.000000,...,0.248310,0.385928,0.241347,0.094008,0.915472,0.814012,0.548642,0.884880,1.000000,0.773711
4,0.629893,0.156578,0.630986,0.489290,0.430351,0.347893,0.463918,0.518390,0.378283,0.186816,...,0.519744,0.123934,0.506948,0.341575,0.437364,0.172415,0.319489,0.558419,0.157500,0.142595
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,0.690000,0.428813,0.678668,0.566490,0.526948,0.296055,0.571462,0.690358,0.336364,0.132056,...,0.623266,0.383262,0.576174,0.452664,0.461137,0.178527,0.328035,0.761512,0.097575,0.105667
565,0.622320,0.626987,0.604036,0.474019,0.407782,0.257714,0.337395,0.486630,0.349495,0.113100,...,0.560655,0.699094,0.520892,0.379915,0.300007,0.159997,0.256789,0.559450,0.198502,0.074315
566,0.455251,0.621238,0.445788,0.303118,0.288165,0.254340,0.216753,0.263519,0.267677,0.137321,...,0.393099,0.589019,0.379949,0.230731,0.282177,0.273705,0.271805,0.487285,0.128721,0.151909
567,0.644564,0.663510,0.665538,0.475716,0.588336,0.790197,0.823336,0.755467,0.675253,0.425442,...,0.633582,0.730277,0.668310,0.402035,0.619626,0.815758,0.749760,0.910653,0.497142,0.452315


Before we can construct the model, we must first convert the diagnosis values into labels that can be used within the model. Replace the benign diagnosis string with the value 0, and the malignant diagnosis string with the value 1.

In [22]:
diag = {
    'benign': 0,
    'malignant' : 1
}
y = y_orig.replace(diag)
y

0      1
1      1
2      1
3      1
4      1
      ..
564    1
565    1
566    1
567    1
568    0
Name: diagnosis, Length: 569, dtype: int64

Also, in order to impartially evaluate the model, we should split the training dataset into a training and a validation set.

In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = 0.2,
    random_state=123
)
X_train

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
190,0.342610,0.453500,0.349527,0.197837,0.495351,0.680694,0.464152,0.328926,0.668687,0.536226,...,0.277837,0.670576,0.278849,0.141860,0.542363,0.878433,0.677955,0.608935,0.709836,0.587433
134,0.542808,0.412580,0.528022,0.395122,0.376185,0.238360,0.270150,0.340308,0.319192,0.153960,...,0.519032,0.516258,0.474077,0.345262,0.497458,0.194245,0.316693,0.473883,0.304356,0.138135
386,0.247480,0.148123,0.241794,0.135101,0.256838,0.180510,0.160239,0.125944,0.295960,0.243892,...,0.184988,0.193763,0.185467,0.084718,0.207555,0.209380,0.245687,0.314089,0.219200,0.217762
118,0.416442,0.446398,0.427821,0.271092,0.567572,0.477946,0.499766,0.471123,0.523232,0.491786,...,0.436144,0.492537,0.397878,0.267106,0.755002,0.451349,0.587540,0.698969,0.336882,0.460186
316,0.246060,0.147785,0.231221,0.134846,0.223075,0.039077,0.026312,0.025104,0.309596,0.137532,...,0.175027,0.118603,0.155336,0.080589,0.191045,0.025254,0.032875,0.063643,0.143505,0.034960
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,0.218609,0.105851,0.211112,0.114146,0.335831,0.171370,0.098313,0.166501,0.282828,0.334035,...,0.182497,0.136994,0.162110,0.080441,0.475005,0.153108,0.153514,0.290344,0.237926,0.213302
322,0.278243,0.122083,0.269712,0.153256,0.548614,0.211521,0.089035,0.168986,0.243939,0.311710,...,0.217360,0.241471,0.211116,0.101824,0.551608,0.189976,0.143051,0.396907,0.161049,0.199987
382,0.239907,0.439973,0.241587,0.129077,0.150943,0.269677,0.186106,0.148012,0.072222,0.350253,...,0.165066,0.444829,0.184023,0.074518,0.111074,0.285347,0.232588,0.375258,0.123398,0.252197
365,0.636992,0.408184,0.622003,0.487593,0.350907,0.287467,0.229592,0.386928,0.281818,0.118155,...,0.582711,0.382463,0.551771,0.391958,0.406326,0.204044,0.215815,0.606529,0.205795,0.080742


Create the model using the normalized dataset and the assigned diagnosis labels.

In [26]:
model1 = MLPClassifier(
    hidden_layer_sizes=(100,),
    solver='sgd',
    learning_rate_init=0.01,
    max_iter=1000,
    random_state=1
)
model1.fit(
    X=X_train,
    y=y_train
)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.01, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='sgd',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

Compute the accuracy of the model against the validation set. The output will be similar to the following:

In [27]:
model1.score(
    X=X_test,
    y=y_test
)

0.9824561403508771

In [35]:
model2 = MLPClassifier(
    hidden_layer_sizes=(100,), # új layer vagy neuronok számának növelése nem hozott javulást
    solver='adam',
    learning_rate_init=0.01, # ha ezt leveszem 0.001-re, nem változtat a score-on
    max_iter=1000, # # ha ezt növelem 5000-re, nem változtat a score-on
    random_state=1
)
model2.fit(
    X=X_train,
    y=y_train
)
model2.score(
    X=X_test,
    y=y_test
)

0.9912280701754386