In [9]:
import warnings

import numpy as np
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.exceptions import ConvergenceWarning

import matplotlib.pyplot as plt

In [2]:
# vanilla gradient descent

#### Bring mnist 784 data

In [2]:
from sklearn.datasets import fetch_openml

X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

In [3]:
X = X / 255.
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

MLPClassifier with 'sgd'

In [4]:
mlp = MLPClassifier(hidden_layer_sizes=(50, ), max_iter=10, alpha=1e-4,
                   solver='sgd', verbose=10, random_state=1,
                   learning_rate_init=.1)

In [5]:
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning,
                            module="sklearn")
    mlp.fit(X_train, y_train)

Iteration 1, loss = 0.32009978
Iteration 2, loss = 0.15347534
Iteration 3, loss = 0.11544755
Iteration 4, loss = 0.09279764
Iteration 5, loss = 0.07889367
Iteration 6, loss = 0.07170497
Iteration 7, loss = 0.06282111
Iteration 8, loss = 0.05530788
Iteration 9, loss = 0.04960484
Iteration 10, loss = 0.04645355


MLPClassifier with 'adam

In [6]:
mlp_adam = MLPClassifier(hidden_layer_sizes=(50, ), max_iter=10, alpha=1e-4,
                   solver='adam', verbose=10, random_state=1,
                   learning_rate_init=.1)

In [7]:
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning,
                            module="sklearn")
    mlp_adam.fit(X_train, y_train)

Iteration 1, loss = 0.60642772
Iteration 2, loss = 0.41250209
Iteration 3, loss = 0.41018109
Iteration 4, loss = 0.43526364
Iteration 5, loss = 0.44345262
Iteration 6, loss = 0.45637747
Iteration 7, loss = 0.43507076
Iteration 8, loss = 0.40767598
Iteration 9, loss = 0.42664405
Iteration 10, loss = 0.46174098


Output with 10 sets

In [11]:
print("Training set score: %f" % mlp.score(X_train, y_train))
print("Test set score: %f" %mlp.score(X_test, y_test))

Training set score: 0.986800
Test set score: 0.970000


In [14]:
print("Training set score: %f" % mlp_adam.score(X_train, y_train))
print("Test set score: %f" %mlp_adam.score(X_test, y_test))

Training set score: 0.874800
Test set score: 0.867000


In [20]:
mlp_sgd_1000 = MLPClassifier(hidden_layer_sizes=(50, ), max_iter=1000, alpha=1e-4,
                   solver='sgd', verbose=10, random_state=1,
                   learning_rate_init=.1)

In [21]:
mlp_adam_1000 = MLPClassifier(hidden_layer_sizes=(50, ), max_iter=1000, alpha=1e-4,
                   solver='adam', verbose=10, random_state=1,
                   learning_rate_init=.1)

In [25]:
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning,
                            module="sklearn")
    mlp_sgd_1000.fit(X_train, y_train)

Iteration 1, loss = 0.32009978
Iteration 2, loss = 0.15347534
Iteration 3, loss = 0.11544755
Iteration 4, loss = 0.09279764
Iteration 5, loss = 0.07889367
Iteration 6, loss = 0.07170497
Iteration 7, loss = 0.06282111
Iteration 8, loss = 0.05530788
Iteration 9, loss = 0.04960484
Iteration 10, loss = 0.04645355
Iteration 11, loss = 0.04082169
Iteration 12, loss = 0.03828222
Iteration 13, loss = 0.03557957
Iteration 14, loss = 0.03054891
Iteration 15, loss = 0.02924761
Iteration 16, loss = 0.02610471
Iteration 17, loss = 0.02363894
Iteration 18, loss = 0.02208186
Iteration 19, loss = 0.01932900
Iteration 20, loss = 0.01830387
Iteration 21, loss = 0.01639227
Iteration 22, loss = 0.01392950
Iteration 23, loss = 0.01270193
Iteration 24, loss = 0.01234102
Iteration 25, loss = 0.01081313
Iteration 26, loss = 0.01028644
Iteration 27, loss = 0.00896707
Iteration 28, loss = 0.00744908
Iteration 29, loss = 0.00707946
Iteration 30, loss = 0.00573869
Iteration 31, loss = 0.00499554
Iteration 32, los

In [26]:
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning,
                            module="sklearn")
    mlp_adam_1000.fit(X_train, y_train)

Iteration 1, loss = 0.60642772
Iteration 2, loss = 0.41250209
Iteration 3, loss = 0.41018109
Iteration 4, loss = 0.43526364
Iteration 5, loss = 0.44345262
Iteration 6, loss = 0.45637747
Iteration 7, loss = 0.43507076
Iteration 8, loss = 0.40767598
Iteration 9, loss = 0.42664405
Iteration 10, loss = 0.46174098
Iteration 11, loss = 0.49915406
Iteration 12, loss = 0.52411150
Iteration 13, loss = 0.47356631
Iteration 14, loss = 0.48478371
Iteration 15, loss = 0.46341648
Iteration 16, loss = 0.47058690
Iteration 17, loss = 0.47210828
Iteration 18, loss = 0.52342594
Iteration 19, loss = 0.56260587
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.


In [29]:
print("SGD Training set score: %f" % mlp_sgd_1000.score(X_train, y_train))
print("SGD Test set score: %f" %mlp_sgd_1000.score(X_test, y_test))
print("Adam Training set score: %f" % mlp_adam_1000.score(X_train, y_train))
print("Adam Test set score: %f" %mlp_adam_1000.score(X_test, y_test))

SGD Training set score: 1.000000
SGD Test set score: 0.973100
Adam Training set score: 0.876067
Adam Test set score: 0.875200
