In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from report import Report
from sklearn.neural_network import MLPClassifier

In [2]:
X, y = fetch_openml(
    'mnist_784',
    version=1,
    return_X_y=True
)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    train_size=60000,
    test_size=10000,
    shuffle=False
)
print(X_train.shape)

(60000, 784)


In [4]:
X_train_normalizer = X_train.max(axis=0)
X_train_normalizer[X_train_normalizer == 0] = 1
X_test_normalizer = X_test.max(axis=0)
X_test_normalizer[X_test_normalizer == 0] = 1

X_train_encoded = X_train / X_train_normalizer
X_test_encoded = X_test / X_test_normalizer

In [5]:
classes=10
y_train_encoded = np.eye(classes)[y_train.astype(int)]
y_test_encoded = np.eye(classes)[y_test.astype(int)]

In [6]:
model = MLPClassifier(
    hidden_layer_sizes=28,
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.1,
    max_iter=100,
    random_state=123,
    verbose=True,
    momentum=0.1
)

In [7]:
model.fit(X_train, y_train_encoded)

Iteration 1, loss = 2.29350776
Iteration 2, loss = 1.55759886
Iteration 3, loss = 1.33713655
Iteration 4, loss = 1.22567012
Iteration 5, loss = 1.28045206
Iteration 6, loss = 1.28156916
Iteration 7, loss = 1.12445355
Iteration 8, loss = 1.09730823
Iteration 9, loss = 1.11028361
Iteration 10, loss = 1.04571306
Iteration 11, loss = 1.06865743
Iteration 12, loss = 0.99488684
Iteration 13, loss = 0.99746207
Iteration 14, loss = 0.98375742
Iteration 15, loss = 1.00370731
Iteration 16, loss = 0.97148752
Iteration 17, loss = 1.00505064
Iteration 18, loss = 1.00348687
Iteration 19, loss = 0.92924669
Iteration 20, loss = 0.89325510
Iteration 21, loss = 0.88885215
Iteration 22, loss = 0.92148068
Iteration 23, loss = 0.97507587
Iteration 24, loss = 0.92669127
Iteration 25, loss = 0.90138509
Iteration 26, loss = 0.87283312
Iteration 27, loss = 0.85121440
Iteration 28, loss = 0.83106134
Iteration 29, loss = 0.84012850
Iteration 30, loss = 0.86440830
Iteration 31, loss = 0.85148965
Iteration 32, los

MLPClassifier(activation='logistic', hidden_layer_sizes=28,
              learning_rate_init=0.1, max_iter=100, momentum=0.1,
              random_state=123, solver='sgd', verbose=True)

In [8]:
pred_train = model.predict_proba(X_train)

In [9]:
report_train = Report(
    np.argmax(np.array(pred_train), axis=1),
    y_train.astype(int)
)

In [10]:
print('Accuracy:', report_train.accuracy())
print('Recall:', report_train.recall())
print('Precision:', report_train.precision())
print('F1:', report_train.f1())

Accuracy: 0.8708833333333333
Recall: {0: 0.4845531285353755, 1: 0.4903620833018369, 2: 0.46071687183200577, 3: 0.4531263937204531, 4: 0.4694878314565928, 5: 0.43460575719649563, 6: 0.4848089144250022, 7: 0.47317524386141946, 8: 0.4391833604907505, 9: 0.4523108083225925}
Precision: {0: 0.4776938915579959, 1: 0.482233125185846, 2: 0.4526858769121309, 3: 0.45757521167357235, 4: 0.4691470054446461, 5: 0.4553600699377117, 6: 0.4618893588786597, 7: 0.4668934616661135, 8: 0.4608267122598813, 9: 0.46454236006051436}
F1: {0: 0.24054953125675033, 1: 0.24313181664855144, 2: 0.2283330342723847, 3: 0.22766996817998475, 4: 0.23465867828612927, 5: 0.22237045733496985, 6: 0.23653584777437986, 7: 0.2350066822586034, 8: 0.22487239890066746, 9: 0.22917249743446214}


In [11]:
pd.DataFrame(
    report_train.confusion(),
    index=list(set(y_test)).sort(),
    columns=list(set(y_test)).sort()
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,5568,2,55,45,14,179,38,67,68,52
1,1,6487,46,45,36,62,16,88,160,24
2,135,51,5090,314,32,124,88,167,130,23
3,28,44,139,5080,8,357,11,60,179,116
4,25,10,85,7,5170,62,98,68,43,282
5,55,37,10,211,6,4167,74,3,399,22
6,71,8,289,26,157,205,5569,7,143,13
7,12,24,91,163,34,54,0,5627,41,379
8,24,74,134,180,34,130,22,56,4582,125
9,4,5,19,60,351,81,2,122,106,4913


In [12]:
pred_test = model.predict_proba(X_test)

In [13]:
report_test = Report(
    np.argmax(np.array(pred_test), axis=1),
    y_test.astype(int)
)

In [14]:
print('Accuracy:', report_test.accuracy())
print('Recall:', report_test.recall())
print('Precision:', report_test.precision())
print('F1:', report_test.f1())

Accuracy: 0.8745
Recall: {0: 0.48825065274151436, 1: 0.49443207126948774, 2: 0.4685890834191555, 3: 0.45902517407605786, 4: 0.47004856988667026, 5: 0.4322087842138765, 6: 0.4790647090810223, 7: 0.47037609479649667, 8: 0.44183381088825213, 9: 0.44772851669403396}
Precision: {0: 0.4748603351955307, 1: 0.488556338028169, 2: 0.46381243628950053, 3: 0.4544008483563097, 4: 0.4655264564404062, 5: 0.45357381429525717, 6: 0.4666313559322034, 7: 0.4670076726342711, 8: 0.45702430349733253, 9: 0.4653014789533561}
F1: {0: 0.24073120494335737, 1: 0.2457383218950631, 2: 0.23309426229508196, 3: 0.22835065281108444, 4: 0.23388829215896886, 5: 0.2213168187744459, 6: 0.23638314998658438, 7: 0.23434291581108832, 8: 0.2246503496503496, 9: 0.2281729428172943}


In [15]:
pd.DataFrame(
    report_test.confusion(),
    index=list(set(y_test)).sort(),
    columns=list(set(y_test)).sort()
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,935,0,11,7,3,26,14,9,16,13
1,0,1110,7,1,3,3,2,14,17,5
2,15,3,910,41,2,16,16,35,11,3
3,3,4,21,857,1,79,2,15,29,18
4,1,0,9,0,871,12,27,5,10,65
5,8,3,0,34,1,679,10,2,75,6
6,14,3,29,2,31,23,881,1,22,1
7,2,0,12,30,6,9,0,913,9,61
8,1,11,31,35,4,32,5,7,771,19
9,1,1,2,3,60,13,1,27,14,818


In [51]:
model = MLPClassifier(
    hidden_layer_sizes=28,
    activation='logistic',
    solver='sgd',
    learning_rate='adaptive',
    learning_rate_init=0.1,
    max_iter=500,
    random_state=123,
    verbose=True,
    momentum=0.1
)

In [52]:
model.fit(X_train, y_train_encoded)

Iteration 1, loss = 2.29350776
Iteration 2, loss = 1.55759886
Iteration 3, loss = 1.33713655
Iteration 4, loss = 1.22567012
Iteration 5, loss = 1.28045206
Iteration 6, loss = 1.28156916
Iteration 7, loss = 1.12445355
Iteration 8, loss = 1.09730823
Iteration 9, loss = 1.11028361
Iteration 10, loss = 1.04571306
Iteration 11, loss = 1.06865743
Iteration 12, loss = 0.99488684
Iteration 13, loss = 0.99746207
Iteration 14, loss = 0.98375742
Iteration 15, loss = 1.00370731
Iteration 16, loss = 0.97148752
Iteration 17, loss = 1.00505064
Iteration 18, loss = 1.00348687
Iteration 19, loss = 0.92924669
Iteration 20, loss = 0.89325510
Iteration 21, loss = 0.88885215
Iteration 22, loss = 0.92148068
Iteration 23, loss = 0.97507587
Iteration 24, loss = 0.92669127
Iteration 25, loss = 0.90138509
Iteration 26, loss = 0.87283312
Iteration 27, loss = 0.85121440
Iteration 28, loss = 0.83106134
Iteration 29, loss = 0.84012850
Iteration 30, loss = 0.86440830
Iteration 31, loss = 0.85148965
Iteration 32, los

Iteration 246, loss = 0.59874195
Iteration 247, loss = 0.59702968
Iteration 248, loss = 0.59549742
Iteration 249, loss = 0.59486623
Iteration 250, loss = 0.59527015
Iteration 251, loss = 0.59454938
Iteration 252, loss = 0.59242551
Iteration 253, loss = 0.59380225
Iteration 254, loss = 0.59255425
Iteration 255, loss = 0.59449970
Iteration 256, loss = 0.59440370
Iteration 257, loss = 0.59396826
Iteration 258, loss = 0.59190325
Iteration 259, loss = 0.59341610
Iteration 260, loss = 0.59468715
Iteration 261, loss = 0.59399022
Iteration 262, loss = 0.59201186
Iteration 263, loss = 0.59017027
Iteration 264, loss = 0.59004800
Iteration 265, loss = 0.58979097
Iteration 266, loss = 0.59248743
Iteration 267, loss = 0.59368650
Iteration 268, loss = 0.59133850
Iteration 269, loss = 0.59226874
Iteration 270, loss = 0.59391651
Iteration 271, loss = 0.59486211
Iteration 272, loss = 0.59351835
Iteration 273, loss = 0.59108869
Iteration 274, loss = 0.59115965
Iteration 275, loss = 0.59105530
Iteration 

MLPClassifier(activation='logistic', hidden_layer_sizes=28,
              learning_rate='adaptive', learning_rate_init=0.1, max_iter=500,
              momentum=0.1, random_state=123, solver='sgd', verbose=True)

In [53]:
pred_train = model.predict_proba(X_train)

In [54]:
report_train = Report(
    np.argmax(np.array(pred_train), axis=1),
    y_train.astype(int)
)

In [55]:
print('Accuracy:', report_train.accuracy())
print('Recall:', report_train.recall())
print('Precision:', report_train.precision())
print('F1:', report_train.f1())

Accuracy: 0.9071666666666667
Recall: {0: 0.49276355228226426, 1: 0.4912082107010792, 2: 0.4713867447431461, 3: 0.4658011675524963, 4: 0.4851956291857596, 5: 0.4556682397831108, 6: 0.4859723790497698, 7: 0.48021239525429354, 8: 0.4631124977060011, 9: 0.4583447145588637}
Precision: {0: 0.48397678526368915, 1: 0.4912082107010792, 2: 0.4730656219392752, 3: 0.4652336611260987, 4: 0.4660572202471644, 5: 0.4739425587467363, 6: 0.47881899871630296, 7: 0.4758303189740217, 8: 0.46649413069599777, 9: 0.4789268385500904}
F1: {0: 0.24416532292285498, 1: 0.2456041053505396, 2: 0.2361123455692827, 3: 0.23275862068965517, 4: 0.23771695017701408, 5: 0.2323128903450394, 6: 0.24118458487800673, 7: 0.23900565718297068, 8: 0.23239858175622785, 9: 0.23420489438913186}


In [56]:
pd.DataFrame(
    report_train.confusion(),
    index=list(set(y_test)).sort(),
    columns=list(set(y_test)).sort()
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,5754,2,61,30,18,87,61,24,57,41
1,0,6509,22,18,22,19,6,36,95,15
2,20,46,5313,251,12,40,46,102,62,26
3,15,36,97,5346,2,267,4,68,201,109
4,21,13,113,13,5506,64,111,98,54,315
5,33,34,15,184,7,4538,52,18,129,27
6,35,5,130,32,50,149,5595,5,87,2
7,10,26,78,100,22,42,0,5788,48,262
8,32,66,121,111,30,176,41,30,5047,118
9,3,5,8,46,173,39,2,96,71,5034


In [57]:
pred_test = model.predict_proba(X_test)

In [58]:
report_test = Report(
    np.argmax(np.array(pred_test), axis=1),
    y_test.astype(int)
)

In [59]:
print('Accuracy:', report_test.accuracy())
print('Recall:', report_test.recall())
print('Precision:', report_test.precision())
print('F1:', report_test.f1())

Accuracy: 0.899
Recall: {0: 0.49432404540763675, 1: 0.49352967425256583, 2: 0.4702258726899384, 3: 0.4647588765235824, 4: 0.4855945521215296, 5: 0.44561839651957735, 6: 0.4799131378935939, 7: 0.4728205128205128, 8: 0.46068660022148394, 9: 0.4575268817204301}
Precision: {0: 0.47876061969015493, 1: 0.4917741218319253, 2: 0.47658688865764826, 3: 0.4565330557001562, 4: 0.46466165413533833, 5: 0.4720210664911126, 6: 0.4793926247288503, 7: 0.47354904982023627, 8: 0.45941468801766977, 9: 0.4778214486243683}
F1: {0: 0.2432089362782432, 1: 0.24632516703786192, 2: 0.2366925064599483, 3: 0.2303046218487395, 4: 0.23744877049180327, 5: 0.22921994884910488, 6: 0.23982637004883342, 7: 0.23659225044906335, 8: 0.2300248824993088, 9: 0.23372699807745123}


In [60]:
pd.DataFrame(
    report_test.confusion(),
    index=list(set(y_test)).sort(),
    columns=list(set(y_test)).sort()
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,958,0,12,11,3,15,18,8,11,7
1,0,1106,6,1,3,1,1,9,14,2
2,2,4,916,36,2,5,8,29,4,0
3,1,7,24,877,0,72,2,15,33,13
4,0,0,16,4,927,8,27,10,15,61
5,5,5,0,28,1,717,9,1,27,9
6,7,3,16,2,9,18,884,1,18,2
7,4,1,12,21,4,13,1,922,10,37
8,3,9,28,24,5,36,8,7,832,27
9,0,0,2,6,28,7,0,26,10,851
