### Load Dataset

In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn import datasets

In [2]:
iris = datasets.load_iris()

In [3]:
inputs = iris.data
inputs

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [4]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [5]:
outputs = iris.target
outputs

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [6]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [7]:
inputs.shape

(150, 4)

In [8]:
outputs.shape

(150,)

### Train and test split

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size = 0.2)

In [10]:
X_train.shape

(120, 4)

In [11]:
y_train.shape

(120,)

In [12]:
X_test.shape

(30, 4)

In [13]:
y_test.shape

(30,)

In [14]:
X_train[0:5]

array([[6. , 2.9, 4.5, 1.5],
       [6.9, 3.2, 5.7, 2.3],
       [4.7, 3.2, 1.6, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.4, 2.9, 1.4, 0.2]])

In [15]:
X_test[0:5]

array([[7.7, 3.8, 6.7, 2.2],
       [6. , 3. , 4.8, 1.8],
       [5.7, 2.5, 5. , 2. ],
       [5.6, 2.8, 4.9, 2. ],
       [7. , 3.2, 4.7, 1.4]])

In [16]:
y_train[0:5]

array([1, 2, 0, 0, 0])

In [17]:
y_test[0:5]

array([2, 2, 2, 2, 1])

### Neural network

In [18]:
network = MLPClassifier(max_iter=2000, 
                        verbose=True,
                        tol=0.0000100,
                        activation = 'logistic',
                        solver = 'adam',
                        learning_rate = 'constant',
                        learning_rate_init = 0.001,
                        batch_size = 32,
                        hidden_layer_sizes = (4, 5)
                        #early_stopping = True,
                        #n_iter_no_change = 50
                        )
network.fit(X_train, y_train)

Iteration 1, loss = 1.12595940
Iteration 2, loss = 1.12370528
Iteration 3, loss = 1.12051047
Iteration 4, loss = 1.11857104
Iteration 5, loss = 1.11611228
Iteration 6, loss = 1.11398576
Iteration 7, loss = 1.11195080
Iteration 8, loss = 1.11004278
Iteration 9, loss = 1.10820365
Iteration 10, loss = 1.10718009
Iteration 11, loss = 1.10556496
Iteration 12, loss = 1.10392213
Iteration 13, loss = 1.10242171
Iteration 14, loss = 1.10146929
Iteration 15, loss = 1.10029493
Iteration 16, loss = 1.09893306
Iteration 17, loss = 1.09764346
Iteration 18, loss = 1.09671184
Iteration 19, loss = 1.09546680
Iteration 20, loss = 1.09446012
Iteration 21, loss = 1.09350297
Iteration 22, loss = 1.09216495
Iteration 23, loss = 1.09118382
Iteration 24, loss = 1.08989581
Iteration 25, loss = 1.08887061
Iteration 26, loss = 1.08769545
Iteration 27, loss = 1.08650612
Iteration 28, loss = 1.08522388
Iteration 29, loss = 1.08421368
Iteration 30, loss = 1.08289802
Iteration 31, loss = 1.08150408
Iteration 32, los

Iteration 314, loss = 0.45289166
Iteration 315, loss = 0.45196550
Iteration 316, loss = 0.45089629
Iteration 317, loss = 0.44982030
Iteration 318, loss = 0.44883456
Iteration 319, loss = 0.44782740
Iteration 320, loss = 0.44676106
Iteration 321, loss = 0.44571024
Iteration 322, loss = 0.44476100
Iteration 323, loss = 0.44377334
Iteration 324, loss = 0.44268175
Iteration 325, loss = 0.44169047
Iteration 326, loss = 0.44067909
Iteration 327, loss = 0.43952462
Iteration 328, loss = 0.43848321
Iteration 329, loss = 0.43747758
Iteration 330, loss = 0.43643500
Iteration 331, loss = 0.43542233
Iteration 332, loss = 0.43438388
Iteration 333, loss = 0.43326033
Iteration 334, loss = 0.43238325
Iteration 335, loss = 0.43131441
Iteration 336, loss = 0.43017246
Iteration 337, loss = 0.42913659
Iteration 338, loss = 0.42806864
Iteration 339, loss = 0.42698825
Iteration 340, loss = 0.42597990
Iteration 341, loss = 0.42490074
Iteration 342, loss = 0.42381868
Iteration 343, loss = 0.42273584
Iteration 

Iteration 623, loss = 0.19241290
Iteration 624, loss = 0.19200198
Iteration 625, loss = 0.19146604
Iteration 626, loss = 0.19095144
Iteration 627, loss = 0.19043159
Iteration 628, loss = 0.18996961
Iteration 629, loss = 0.18938193
Iteration 630, loss = 0.18893422
Iteration 631, loss = 0.18841889
Iteration 632, loss = 0.18794328
Iteration 633, loss = 0.18752608
Iteration 634, loss = 0.18716611
Iteration 635, loss = 0.18656496
Iteration 636, loss = 0.18614962
Iteration 637, loss = 0.18567445
Iteration 638, loss = 0.18512104
Iteration 639, loss = 0.18466553
Iteration 640, loss = 0.18425103
Iteration 641, loss = 0.18367629
Iteration 642, loss = 0.18344272
Iteration 643, loss = 0.18291536
Iteration 644, loss = 0.18236774
Iteration 645, loss = 0.18187138
Iteration 646, loss = 0.18148453
Iteration 647, loss = 0.18101457
Iteration 648, loss = 0.18051974
Iteration 649, loss = 0.17996221
Iteration 650, loss = 0.17996138
Iteration 651, loss = 0.17919625
Iteration 652, loss = 0.17870884
Iteration 

Iteration 891, loss = 0.10918677
Iteration 892, loss = 0.10903432
Iteration 893, loss = 0.10877242
Iteration 894, loss = 0.10864198
Iteration 895, loss = 0.10837089
Iteration 896, loss = 0.10823269
Iteration 897, loss = 0.10865633
Iteration 898, loss = 0.10801326
Iteration 899, loss = 0.10777638
Iteration 900, loss = 0.10763353
Iteration 901, loss = 0.10735056
Iteration 902, loss = 0.10719436
Iteration 903, loss = 0.10708233
Iteration 904, loss = 0.10686871
Iteration 905, loss = 0.10670920
Iteration 906, loss = 0.10650636
Iteration 907, loss = 0.10640900
Iteration 908, loss = 0.10623253
Iteration 909, loss = 0.10604918
Iteration 910, loss = 0.10596350
Iteration 911, loss = 0.10575405
Iteration 912, loss = 0.10551206
Iteration 913, loss = 0.10539478
Iteration 914, loss = 0.10522638
Iteration 915, loss = 0.10526706
Iteration 916, loss = 0.10492371
Iteration 917, loss = 0.10473690
Iteration 918, loss = 0.10458491
Iteration 919, loss = 0.10447312
Iteration 920, loss = 0.10429158
Iteration 

Iteration 1146, loss = 0.07894332
Iteration 1147, loss = 0.07876574
Iteration 1148, loss = 0.07873649
Iteration 1149, loss = 0.07873285
Iteration 1150, loss = 0.07856642
Iteration 1151, loss = 0.07881113
Iteration 1152, loss = 0.07840540
Iteration 1153, loss = 0.07862465
Iteration 1154, loss = 0.07843400
Iteration 1155, loss = 0.07829447
Iteration 1156, loss = 0.07808916
Iteration 1157, loss = 0.07809580
Iteration 1158, loss = 0.07807794
Iteration 1159, loss = 0.07804488
Iteration 1160, loss = 0.07783941
Iteration 1161, loss = 0.07775410
Iteration 1162, loss = 0.07779183
Iteration 1163, loss = 0.07756284
Iteration 1164, loss = 0.07751284
Iteration 1165, loss = 0.07764117
Iteration 1166, loss = 0.07766703
Iteration 1167, loss = 0.07731136
Iteration 1168, loss = 0.07722613
Iteration 1169, loss = 0.07726003
Iteration 1170, loss = 0.07729932
Iteration 1171, loss = 0.07698577
Iteration 1172, loss = 0.07688738
Iteration 1173, loss = 0.07699781
Iteration 1174, loss = 0.07701405
Iteration 1175

Iteration 1412, loss = 0.06477749
Iteration 1413, loss = 0.06463064
Iteration 1414, loss = 0.06490169
Iteration 1415, loss = 0.06510286
Iteration 1416, loss = 0.06452466
Iteration 1417, loss = 0.06450548
Iteration 1418, loss = 0.06457789
Iteration 1419, loss = 0.06443001
Iteration 1420, loss = 0.06477155
Iteration 1421, loss = 0.06452597
Iteration 1422, loss = 0.06443168
Iteration 1423, loss = 0.06430106
Iteration 1424, loss = 0.06438864
Iteration 1425, loss = 0.06427121
Iteration 1426, loss = 0.06417878
Iteration 1427, loss = 0.06415819
Iteration 1428, loss = 0.06417541
Iteration 1429, loss = 0.06408707
Iteration 1430, loss = 0.06435413
Iteration 1431, loss = 0.06410644
Iteration 1432, loss = 0.06404982
Iteration 1433, loss = 0.06401034
Iteration 1434, loss = 0.06387444
Iteration 1435, loss = 0.06385004
Iteration 1436, loss = 0.06414788
Iteration 1437, loss = 0.06400256
Iteration 1438, loss = 0.06374875
Iteration 1439, loss = 0.06377529
Iteration 1440, loss = 0.06373176
Iteration 1441

Iteration 1682, loss = 0.05772074
Iteration 1683, loss = 0.05748223
Iteration 1684, loss = 0.05755700
Iteration 1685, loss = 0.05753544
Iteration 1686, loss = 0.05736525
Iteration 1687, loss = 0.05733912
Iteration 1688, loss = 0.05733483
Iteration 1689, loss = 0.05737749
Iteration 1690, loss = 0.05733297
Iteration 1691, loss = 0.05743767
Iteration 1692, loss = 0.05737329
Iteration 1693, loss = 0.05727263
Iteration 1694, loss = 0.05738112
Iteration 1695, loss = 0.05730767
Iteration 1696, loss = 0.05729897
Iteration 1697, loss = 0.05711713
Iteration 1698, loss = 0.05719657
Iteration 1699, loss = 0.05724001
Iteration 1700, loss = 0.05739701
Iteration 1701, loss = 0.05736496
Iteration 1702, loss = 0.05743030
Iteration 1703, loss = 0.05736812
Iteration 1704, loss = 0.05704042
Iteration 1705, loss = 0.05706169
Iteration 1706, loss = 0.05725240
Iteration 1707, loss = 0.05707233
Iteration 1708, loss = 0.05715123
Iteration 1709, loss = 0.05693256
Iteration 1710, loss = 0.05694783
Iteration 1711

MLPClassifier(activation='logistic', batch_size=32, hidden_layer_sizes=(4, 5),
              max_iter=2000, tol=1e-05, verbose=True)

In [19]:
network.classes_

array([0, 1, 2])

In [20]:
network.coefs_

[array([[-0.2067025 ,  0.85069607,  0.70678564, -0.01455573],
        [-1.58580011,  0.79965021,  1.18887755,  1.40705856],
        [ 1.75204891, -1.33085221, -1.46841985, -0.82722656],
        [ 2.29503516, -1.95719892, -1.7749755 , -1.39732739]]),
 array([[-2.01432853,  1.81744635,  4.82574234, -1.75503355,  4.40891161],
        [ 2.88231248, -2.78555495, -2.05174366,  3.37255836, -1.60032461],
        [ 3.23986827, -3.15676337, -0.8055287 ,  3.51916203, -1.0568313 ],
        [ 2.61937091, -2.95740236, -2.81780937,  2.62887805, -2.77175459]]),
 array([[ 2.9714095 ,  0.92581596, -3.31194755],
        [-2.49806109, -1.73667153,  3.05446772],
        [-4.16116834,  1.11856629,  1.00657736],
        [ 2.36450024,  1.20292976, -3.25632948],
        [-4.01231563,  1.14789322,  1.79234226]])]

In [21]:
network.intercepts_

[array([-0.45180665,  1.485364  ,  1.7633625 ,  1.18091781]),
 array([-0.8618882 ,  0.93613559,  1.88401258, -1.2420346 ,  1.84571613]),
 array([ 1.24969784, -0.80569618,  0.21660829])]

In [22]:
network.n_layers_

4

In [23]:
network.n_outputs_

3

In [24]:
network.out_activation_

'softmax'

### Neural network evaluation

In [25]:
X_test.shape

(30, 4)

In [26]:
predictions = network.predict(X_test)
predictions

array([2, 2, 2, 2, 1, 2, 2, 0, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 2, 2])

In [27]:
y_test

array([2, 2, 2, 2, 1, 2, 2, 0, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 2, 1])

In [28]:
from sklearn.metrics import accuracy_score, confusion_matrix
accuracy_score(y_test, predictions)

0.9666666666666667

In [29]:
cm = confusion_matrix(y_test, predictions)
cm

array([[10,  0,  0],
       [ 0, 11,  1],
       [ 0,  0,  8]], dtype=int64)

### Neural network classification

In [32]:
X_test[0], y_test[0]

(array([7.7, 3.8, 6.7, 2.2]), 2)

In [33]:
X_test[0].shape

(4,)

In [34]:
new = X_test[0].reshape(1, -1)
new.shape

(1, 4)

In [35]:
network.predict(new)

array([2])

In [36]:
iris.target_names[network.predict(new)[0]]

'virginica'