# MNIST NN Experiments

In [None]:
import numpy as np
import time
from sklearn.neural_network import MLPClassifier

def run(x_train, y_train, x_test, y_test, clf):
    s = time.time()
    clf.fit(x_train, y_train)
    e = time.time()-s
    loss = clf.loss_
    weights = clf.coefs_
    biases = clf.intercepts_
    params = 0
    for w in weights:
        params += w.shape[0]*w.shape[1]
    for b in biases:
        params += b.shape[0]
    return [clf.score(x_test, y_test), loss, params, e]

def nn(layers, act):
    return MLPClassifier(solver="sgd", verbose=False, tol=1e-8,
            nesterovs_momentum=False, early_stopping=False,
            learning_rate_init=0.001, momentum=0.9, max_iter=200,
            hidden_layer_sizes=layers, activation=act)

def main():
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    N = 1000
    x_train = x_train[:N]
    y_train = y_train[:N]
    x_test  = x_test[:N]
    y_test  = y_test[:N]

    layers = [
        (1,), (500,), (800,), (1000,), (2000,), (3000,),
        (1000,500), (3000,1500),
        (2,2,2), (1000,500,250), (2000,1000,500),
    ]

    for act in ["relu", "logistic", "tanh"]:
        print("%s:" % act)
        for layer in layers:
            scores = []
            loss = []
            tm = []
            for i in range(10):
                s,l,params,e = run(x_train, y_train, x_test, y_test, nn(layer,act))
                scores.append(s)
                loss.append(l)
                tm.append(e)
            s = np.array(scores)
            l = np.array(loss)
            t = np.array(tm)
            n = np.sqrt(s.shape[0])
            print("    layers: %14s, score= %0.4f +/- %0.4f, loss = %0.4f +/- %0.4f (params = %6d, time = %0.2f s)" % \
                (str(layer), s.mean(), s.std()/n, l.mean(), l.std()/n, params, t.mean()))

main()

# ReLU

In [None]:
#
#  file:  mnist_nn_experiments_relu.py
#
#  Reduced MNIST + NN for Chapter 6.
#
#  RTK, 13-Oct-2018
#  Last update:  30-Dec-2018
#
###############################################################

import numpy as np
import time
from sklearn.neural_network import MLPClassifier 

def nparams(x_train, y_train, clf):
    clf.max_iter=1
    clf.fit(x_train, y_train)
    weights = clf.coefs_
    biases = clf.intercepts_
    params = 0
    for w in weights:
        params += w.shape[0]*w.shape[1]
    for b in biases:
        params += b.shape[0]
    return params


def run(x_train, y_train, x_test, y_test, clf):
    """Train and test"""

    s = time.time()
    clf.fit(x_train, y_train)
    e = time.time()-s
    loss = clf.loss_
    weights = clf.coefs_
    biases = clf.intercepts_
    params = 0
    for w in weights:
        params += w.shape[0]*w.shape[1]
    for b in biases:
        params += b.shape[0]
    return [clf.score(x_test, y_test), loss, params, e]


def nn(layers, act):
    """Initialize a network"""

    return MLPClassifier(solver="sgd", verbose=False, tol=1e-8,
            nesterovs_momentum=False, early_stopping=False,
            learning_rate_init=0.001, momentum=0.9, max_iter=200,
            hidden_layer_sizes=layers, activation=act)


def main():
    """Run the experiments for the MNIST data"""

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  Reduce the size of the train dataset
    N = 20000
    x_train = x_train[:N]
    y_train = y_train[:N]
    x_test  = x_test[:N]
    y_test  = y_test[:N]

    #  chosen so # params approx same across respective number of layers
    layers = [
        (1000,), (2000,), (4000,), (8000,),
        (700,350), (1150,575), (1850,925), (2850,1425),
        (660, 330, 165), (1080,540,270), (1714,857,429), (2620,1310,655),
    ]

    layers = [(8000,),(2850,1425)]

    for layer in layers:
        scores = []
        loss = []
        tm = []
        for i in range(5):
            s,l,params,e = run(x_train, y_train, x_test, y_test, nn(layer,"relu"))
            scores.append(s)
            loss.append(l)
            tm.append(e)
        s = np.array(scores)
        l = np.array(loss)
        t = np.array(tm)
        n = np.sqrt(s.shape[0])
        print("layers: %14s, score= %0.4f +/- %0.4f, loss = %0.4f +/- %0.4f (params = %6d, time = %0.2f s)" % \
            (str(layer), s.mean(), s.std()/n, l.mean(), l.std()/n, params, t.mean()))


main()



# Retrain

In [None]:
#
#  file:  mnist_nn_experiments_retrain.py
#
#  Reduced MNIST + NN for Chapter 6.
#
#  RTK, 15-Oct-2018
#  Last update:  15-Oct-2018
#
###############################################################

import numpy as np
import time
from sklearn.neural_network import MLPClassifier 


def run(x_train, y_train, x_test, y_test, clf):
    """Train and test"""

    s = time.time()
    clf.fit(x_train, y_train)
    e = time.time()-s
    loss = clf.loss_
    return [clf.score(x_test, y_test), loss, e]


def nn():
    """Initialize a network"""

    return MLPClassifier(solver="sgd", verbose=False, tol=1e-8,
            nesterovs_momentum=False, early_stopping=False,
            learning_rate_init=0.001, momentum=0.9, max_iter=50,
            hidden_layer_sizes=(1000,500), activation="relu",
            batch_size=64)


def main():
    """Run the experiments for the iris data"""

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  training set samples
    N = 20000
    x = x_train[:N]
    y = y_train[:N]
    xt= x_test[:N]
    yt= y_test[:N]

    M = 20
    scores = np.zeros(M)
    losses = np.zeros(M)
    for i in range(M):
        s,l,e = run(x, y, xt, yt, nn())
        print("%03i: score = %0.5f, loss = %0.5f" % (i,s,l))
        scores[i] = s
        losses[i] = l

    print()
    print("Scores:  min, max, mean+/-SE: %0.5f, %0.5f, %0.5f +/- %0.5f" % \
        (scores.min(), scores.max(), scores.mean(), scores.std()/np.sqrt(scores.shape[0])))
    print("Loss  :  min, max, mean+/-SE: %0.5f, %0.5f, %0.5f +/- %0.5f" % \
        (losses.min(), losses.max(), losses.mean(), losses.std()/np.sqrt(losses.shape[0])))
    print()


main()



# Batch Size

In [None]:
#
#  file:  mnist_nn_experiments_batch_size.py
#
#  RTK, 14-Oct-2018
#  Last update:  07-Jan-2019
#
###############################################################

import numpy as np
import time
from sklearn.neural_network import MLPClassifier 


def run(x_train, y_train, x_test, y_test, clf):
    """Train and test"""

    s = time.time()
    clf.fit(x_train, y_train)
    e = time.time()-s
    loss = clf.loss_
    weights = clf.coefs_
    biases = clf.intercepts_
    params = 0
    for w in weights:
        params += w.shape[0]*w.shape[1]
    for b in biases:
        params += b.shape[0]
    return [clf.score(x_test, y_test), loss, params, e, clf.n_iter_]


def nn(bz,epochs):
    """Initialize a network"""

    return MLPClassifier(solver="sgd", verbose=False, tol=1e-8,
            nesterovs_momentum=False, early_stopping=False,
            learning_rate_init=0.001, momentum=0.9, max_iter=epochs,
            hidden_layer_sizes=(1000,500), activation="relu",
            batch_size=bz)


def main():
    """Run the experiments for the iris data"""

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  training set samples
    N = 16384
    x = x_train[:N]
    y = y_train[:N]

    batch_sizes = [16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
    M = 8192  # set epochs so minibatches is constant

    for bz in batch_sizes:
        print("batch size = %4d:" % bz)
        #epochs = 100 
        epochs = (M*bz) // N
        if (epochs < 1):
            epochs = 1
        scores = []
        loss = []
        tm = []
        for i in range(5):
            s,l,p,e,m = run(x, y, x_test, y_test, nn(bz,epochs))
            scores.append(s)
            loss.append(l)
            tm.append(e)
            print("    score = %0.5f, loss = %0.5f, epochs = %d, actual = %d" % (s,l,epochs,m))
        scores = np.array(scores)
        loss = np.array(loss)
        sm = scores.mean()
        se = scores.std() / np.sqrt(scores.shape[0])
        lm = loss.mean()
        le = loss.std() / np.sqrt(loss.shape[0])
        print("    final score = %0.5f +/- %0.5f, loss = %0.5f +/- %0.5f, epochs = %d" % (sm,se,lm,le,epochs))


main()



In [None]:
import numpy as np
import matplotlib.pylab as plt

def main():
    # epochs == 100
    bz = np.array([2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384])
    sc = np.array([0.97174,0.96976,0.96876,0.96734,0.96596,0.96332,0.95684,0.94564,0.93312,0.91788,0.90046,0.87318,0.82926,0.76104])
    ec = np.array([0.00006,0.00034,0.00007,0.00012,0.00011,0.00040,0.00042,0.00040,0.00038,0.00040,0.00078,0.00086,0.00203,0.00591])

    # minibatches = 8192
    sc0= np.array([0.93658,0.94556,0.94856,0.94916,0.95012,0.94946,0.95068,0.95038,0.95112,0.95030,0.95066,0.95028,0.94992,0.94994])
    ec0= np.array([0.00214,0.00078,0.00070,0.00115,0.00025,0.00028,0.00053,0.00041,0.00045,0.00023,0.00032,0.00058,0.00044,0.00022])

    plt.errorbar(bz,sc,ec,marker='o',color='k', fillstyle='none')
    plt.errorbar(bz,sc0,ec0,marker='s',color='k', fillstyle='none')
    plt.xlabel("Minibatch Size")
    plt.ylabel("Mean Score")
    plt.tight_layout(pad=0, w_pad=0, h_pad=0)
    plt.savefig("mnist_nn_experiments_batch_size_plot.png", format="png", dpi=300)
    plt.show()


main()



# Base Learning Rate

In [None]:
#
#  file:  mnist_nn_experiments_base_lr.py
#
#  Reduced MNIST + NN for Chapter 6.
#
#  RTK, 15-Oct-2018
#  Last update:  15-Oct-2018
#
###############################################################

import numpy as np
import time
from sklearn.neural_network import MLPClassifier 


def run(x_train, y_train, x_test, y_test, clf):
    """Train and test"""

    s = time.time()
    clf.fit(x_train, y_train)
    e = time.time()-s
    loss = clf.loss_
    return [clf.score(x_test, y_test), loss, e]


def nn(base_lr, epochs):
    """Initialize a network"""

    return MLPClassifier(solver="sgd", verbose=False, tol=1e-8,
            nesterovs_momentum=False, early_stopping=False,
            learning_rate_init=base_lr, momentum=0.9, max_iter=epochs,
            hidden_layer_sizes=(1000,500), activation="relu",
            learning_rate="constant", batch_size=64)


def main():
    """Run the experiments for the MNIST vector data"""

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  training set samples
    N = 20000
    x = x_train[:N]
    y = y_train[:N]
    xt= x_test[:N]
    yt= y_test[:N]

    base_lr = [0.2,0.1,0.05,0.01,0.005,0.001,0.0005,0.0001]

    for lr in base_lr:
        s,l,e = run(x, y, xt, yt, nn(lr,50))
        print("base_lr = %0.5f, score = %0.5f, loss = %0.5f, epochs = %d" % (lr,s,l,50))
    print()

    #  choose epochs so base_lr * epochs == 1.5
    epochs = [8, 15, 30, 150, 300, 1500, 3000, 15000]

    for i in range(len(base_lr)):
        s,l,e = run(x, y, xt, yt, nn(base_lr[i], epochs[i]))
        print("base_lr = %0.5f, score = %0.5f, loss = %0.5f, epochs = %d, time = %0.3f" % (base_lr[i],s,l,epochs[i],e))
    print()

main()



In [None]:
import numpy as np
import matplotlib.pylab as plt

def main():
    # score by base_lr, fixed epochs
    sc0 = np.array([0.91870,0.95070,0.96050,0.97120,0.97260,0.97540,0.97630,0.94800])
    lr = np.array([0.00010,0.00050,0.00100,0.00500,0.01000,0.05000,0.10000,0.20000])

    # score by base_lr, lr * epochs = 1.5
    sc1 = np.array([0.96990,0.97030,0.97060,0.97240,0.97310,0.97590,0.97340,0.95550])

    plt.semilogx(lr,sc0,marker='o',color='k', fillstyle='none')
    plt.semilogx(lr,sc1,marker='s',color='k', fillstyle='none')
    plt.xlabel("Learning rate ($\eta$)")
    plt.ylabel("Test Score")
    plt.tight_layout(pad=0, w_pad=0, h_pad=0)
    plt.savefig("mnist_nn_experiments_base_lr_plot.png", format="png", dpi=300)
    plt.show()

main()


#base_lr = 0.20000, score = 0.94800, loss = 0.09340, epochs = 50
#base_lr = 0.10000, score = 0.97630, loss = 0.00207, epochs = 50
#base_lr = 0.05000, score = 0.97540, loss = 0.00162, epochs = 50
#base_lr = 0.01000, score = 0.97260, loss = 0.00229, epochs = 50
#base_lr = 0.00500, score = 0.97120, loss = 0.00413, epochs = 50
#base_lr = 0.00100, score = 0.96050, loss = 0.06542, epochs = 50
#base_lr = 0.00050, score = 0.95070, loss = 0.13361, epochs = 50
#base_lr = 0.00010, score = 0.91870, loss = 0.29111, epochs = 50
#
#base_lr = 0.20000, score = 0.95550, loss = 0.07414, epochs = 8, time = 71.445
#base_lr = 0.10000, score = 0.97340, loss = 0.00946, epochs = 15, time = 132.122
#base_lr = 0.05000, score = 0.97590, loss = 0.00168, epochs = 30, time = 268.132
#base_lr = 0.01000, score = 0.97310, loss = 0.00163, epochs = 150, time = 1385.001
#base_lr = 0.00500, score = 0.97240, loss = 0.00163, epochs = 300, time = 2741.335
#base_lr = 0.00100, score = 0.97060, loss = 0.00163, epochs = 1500, time = 13232.182
#base_lr = 0.00050, score = 0.97030, loss = 0.00163, epochs = 3000, time = 26478.357
#base_lr = 0.00010, score = 0.96990, loss = 0.00162, epochs = 15000, time = 135642.231




# Training-Set Size (Samples)

In [None]:
#
#  file:  mnist_nn_experiments_samples.py
#
#  Reduced MNIST + NN for Chapter 6.
#
#  RTK, 15-Oct-2018
#  Last update:  15-Oct-2018
#
###############################################################

import numpy as np
import time
from sklearn.neural_network import MLPClassifier 


def run(x_train, y_train, x_test, y_test, clf):
    """Train and test"""

    s = time.time()
    clf.fit(x_train, y_train)
    e = time.time()-s
    loss = clf.loss_
    return [clf.score(x_test, y_test), loss, e]


def nn(epochs):
    """Initialize a network"""

    return MLPClassifier(solver="sgd", verbose=False, tol=1e-8,
            nesterovs_momentum=False, early_stopping=False,
            learning_rate_init=0.05, momentum=0.9, max_iter=epochs,
            hidden_layer_sizes=(1000,500), activation="relu",
            learning_rate="constant", batch_size=100)


def main():
    """Run the experiments for the MNIST vector data"""

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  training set samples
    N = [100, 200, 300, 400, 500, 600,
        700, 800, 900, 1000, 1500, 2000, 2500, 3000, 3500, 4000,
        4500, 5000, 7500, 10000, 15000, 20000, 25000, 30000]
    M = 5

    for n in N:
        scores = np.zeros(M)
        print("samples = %5d" % n)
        for i in range(M):
            idx = np.argsort(np.random.random(y_train.shape[0]))
            x_train = x_train[idx]
            y_train = y_train[idx]
            x = x_train[:n]
            y = y_train[:n]
            epochs = int((100.0/n)*1000) # epochs to take 1,000 SGD steps
            s,l,e = run(x, y, x_test, y_test, nn(epochs))
            scores[i] = s
            print("    score = %0.5f, loss = %0.5f, epochs = %d, training time = %0.3f" % (s,l,epochs,e))
        print("    mean score = %0.5f +/- %0.5f" % (scores.mean(), scores.std()/np.sqrt(M)))
        print()
    print()


main()



In [None]:
import matplotlib.pylab as plt

def main():
    x = [100, 200, 300, 400, 500, 600,700, 800, 900, 1000, 
        1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 7500, 
        10000, 15000, 20000, 25000, 30000]
    y = [0.75330, 0.81608, 0.85652, 0.86402, 0.87372, 0.87764,
        0.88382, 0.89334, 0.89644, 0.90168, 0.91856, 0.92490,
        0.92966, 0.93690, 0.94190, 0.94424, 0.94964, 0.95032,
        0.95862, 0.96526, 0.96664, 0.96944, 0.96980, 0.96926]
  
    plt.plot(x,y, marker="o", color="k", fillstyle='none')
    plt.xlabel("Number of training samples")
    plt.ylabel("Mean test score")
    plt.tight_layout(pad=0, w_pad=0, h_pad=0)
    plt.savefig("mnist_nn_experiments_samples_plot.png", dpi=300)
    plt.show()


main()



# L2 Regularization

In [None]:
#
#  file:  mnist_nn_experiments_L2.py
#
#  RTK, 19-Oct-2018
#  Last update:  20-Oct-2018
#
###############################################################

import os
import time
import numpy as np
import matplotlib.pylab as plt
from sklearn.neural_network import MLPClassifier


def epoch(x_train, y_train, x_test, y_test, clf):
    """Results for a single epoch"""

    clf.fit(x_train, y_train)
    train_loss = clf.loss_
    train_err = 1.0 - clf.score(x_train, y_train)
    val_err = 1.0 - clf.score(x_test, y_test)
    clf.warm_start = True
    return [train_loss, train_err, val_err]


def run(x_train, y_train, x_test, y_test, clf, max_iter):
    """Train and test"""

    train_loss = []
    train_err = []
    val_err = []

    clf.max_iter = 1  # one epoch at a time
    for i in range(max_iter):
        tl, terr, verr = epoch(x_train, y_train, x_test, y_test, clf)
        train_loss.append(tl)
        train_err.append(terr)
        val_err.append(verr)
        print("    %4d: val_err = %0.5f" % (i, val_err[-1]))

    wavg = 0.0
    n = 0
    for w in clf.coefs_:
        wavg += w.sum()
        n += w.size
    wavg /= n

    return [train_loss, train_err, val_err, wavg]


def main():
    """Plot the training and validation losses."""

    os.system("rm -rf mnist_nn_experiments_L2")
    os.system("mkdir mnist_nn_experiments_L2")

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  Reduce the size of the train dataset
    x_train = x_train[:3000]
    y_train = y_train[:3000]

    #  L2 values
    colors= ['k','r','b','g','c']
    alpha = [0.0,0.1,0.2,0.3,0.4]
    epochs = 10000 

    for k,a in enumerate(alpha):
        nn = MLPClassifier(solver="sgd", verbose=False, tol=0,
                nesterovs_momentum=False,
                early_stopping=False,
                learning_rate_init=0.01,
                momentum=0.0,
                hidden_layer_sizes=(100,50),
                activation="relu",
                alpha=a,
                learning_rate="constant",
                batch_size=64,
                max_iter=1)
        tt = "alpha = %0.6f" % a
        print(tt)
        train_loss, train_err, val_err, wavg = run(x_train, y_train, x_test, y_test, nn, epochs)
        print("    final: train error: %0.5f, val error: %0.5f, mean weight value = %0.8f"  % \
            (train_err[-1], val_err[-1], wavg))
        print()
        if (k==0):
            plt.plot(val_err, color=colors[k], linewidth=3)
        else:
            plt.plot(val_err, color=colors[k])
        np.save("mnist_nn_experiments_L2/train_error_%0.6f.npy" % a, train_err)
        np.save("mnist_nn_experiments_L2/train_loss_%0.6f.npy" % a, train_loss)
        np.save("mnist_nn_experiments_L2/val_error_%0.6f.npy" % a, val_err)
        np.save("mnist_nn_experiments_L2/mean_weight_%0.6f.npy" % a, np.array(wavg))
    plt.ylim((0.03,0.17))
    plt.xlabel("Epochs", fontsize=16)
    plt.ylabel("Error", fontsize=16)
    plt.tight_layout(pad=0, w_pad=0, h_pad=0)
    pname = "mnist_nn_experiments_L2/mnist_nn_experiments_L2_plot.png"
    plt.savefig(pname, dpi=300)
    plt.close()


if (__name__ == "__main__"):
    main()



In [None]:
import numpy as np
import matplotlib.pylab as plt

def main():
    d0 = np.load("mnist_nn_experiments_L2/val_error_0.000000.npy")
    d1 = np.load("mnist_nn_experiments_L2/val_error_0.100000.npy")
    d2 = np.load("mnist_nn_experiments_L2/val_error_0.200000.npy")
    d3 = np.load("mnist_nn_experiments_L2/val_error_0.300000.npy")
    d4 = np.load("mnist_nn_experiments_L2/val_error_0.400000.npy")

    plt.plot(d0, color="k", linewidth=1, linestyle="-")
    plt.plot(d1, color="r", linewidth=1, linestyle="-")
    plt.plot(d2, color="g", linewidth=1, linestyle="-")
    plt.plot(d3, color="b", linewidth=1, linestyle="-")
    plt.plot(d4, color="c", linewidth=1, linestyle="-")
    plt.xlabel("Epochs")
    plt.ylabel("Validation Error")
    plt.ylim((0.05,0.1))
    plt.tight_layout(pad=0, w_pad=0, h_pad=0)
    plt.savefig("mnist_nn_experiments_L2_plot.png", dpi=300)
    plt.show()


main()



# Momentum

In [None]:
#
#  file:  mnist_nn_experiments_momentum.py
#
#  RTK, 19-Oct-2018
#  Last update:  03-Feb-2019
#
###############################################################

import os
import time
import numpy as np
import matplotlib.pylab as plt
from sklearn.neural_network import MLPClassifier


def epoch(x_train, y_train, x_test, y_test, clf):
    """Results for a single epoch"""

    clf.fit(x_train, y_train)
    train_loss = clf.loss_
    train_err = 1.0 - clf.score(x_train, y_train)
    val_err = 1.0 - clf.score(x_test, y_test)
    clf.warm_start = True
    return [train_loss, train_err, val_err]


def run(x_train, y_train, x_test, y_test, clf, max_iter):
    """Train and test"""

    train_loss = []
    train_err = []
    val_err = []

    clf.max_iter = 1  # one epoch at a time
    for i in range(max_iter):
        tl, terr, verr = epoch(x_train, y_train, x_test, y_test, clf)
        train_loss.append(tl)
        train_err.append(terr)
        val_err.append(verr)
        print("    %4d: val_err = %0.5f" % (i, val_err[-1]))

    return [train_loss, train_err, val_err]


def main():
    """Plot the training and validation losses."""

    os.system("rm -rf mnist_nn_experiments_momentum")
    os.system("mkdir mnist_nn_experiments_momentum")

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  Reduce the size of the train dataset
    x_train = x_train[:3000]
    y_train = y_train[:3000]

    #  momentum values
    colors= ['k','r','b','g','c','m']
    momentum = [0.0,0.3,0.5,0.7,0.9,0.99]
    epochs = 10000 

    for k,m in enumerate(momentum):
        nn = MLPClassifier(solver="sgd", verbose=False, tol=0,
                nesterovs_momentum=False,
                early_stopping=False,
                learning_rate_init=0.01,
                momentum=m,
                hidden_layer_sizes=(100,50),
                activation="relu",
                alpha=0.0001,
                learning_rate="constant",
                batch_size=64,
                max_iter=1)
        print("momentum = %0.1f" % m)
        train_loss, train_err, val_err = run(x_train, y_train, x_test, y_test, nn, epochs)
        print("    final: train error: %0.5f, val error: %0.5f"  % \
            (train_err[-1], val_err[-1]))
        print()
        if (k==0):
            plt.plot(val_err, color=colors[k], linewidth=3)
        else:
            plt.plot(val_err, color=colors[k])
        np.save("mnist_nn_experiments_momentum/train_error_%0.2f.npy" % m, train_err)
        np.save("mnist_nn_experiments_momentum/train_loss_%0.2f.npy" % m, train_loss)
        np.save("mnist_nn_experiments_momentum/val_error_%0.2f.npy" % m, val_err)
    plt.xlabel("Epochs")
    plt.ylabel("Error")
    plt.tight_layout()
    pname = "mnist_nn_experiments_momentum/mnist_nn_experiments_momentum_plot.png"
    plt.savefig(pname, format="png", dpi=600)
    plt.close()


if (__name__ == "__main__"):
    main()



In [None]:
import numpy as np
import matplotlib.pylab as plt

def main():
    d0 = np.load("mnist_nn_experiments_momentum/val_error_0.00.npy")
    d1 = np.load("mnist_nn_experiments_momentum/val_error_0.30.npy")
    d2 = np.load("mnist_nn_experiments_momentum/val_error_0.50.npy")
    d3 = np.load("mnist_nn_experiments_momentum/val_error_0.70.npy")
    d4 = np.load("mnist_nn_experiments_momentum/val_error_0.90.npy")
    d5 = np.load("mnist_nn_experiments_momentum/val_error_0.99.npy")

    plt.plot(d0, color="k", linewidth=1, linestyle="-")
    plt.plot(d1, color="r", linewidth=1, linestyle="-")
    plt.plot(d2, color="g", linewidth=1, linestyle="-")
    plt.plot(d3, color="b", linewidth=1, linestyle="-")
    plt.plot(d4, color="c", linewidth=1, linestyle="-")
    plt.plot(d5, color="m", linewidth=1, linestyle="-")
    plt.xlabel("Epochs")
    plt.ylabel("Test Error")
    plt.ylim((0.05,0.1))
    plt.tight_layout()
    plt.savefig("mnist_nn_experiments_momentum_plot.pdf", type="pdf", dpi=600)
    plt.savefig("mnist_nn_experiments_momentum_plot.png", type="png", dpi=600)
    plt.show()


main()



## 30k Momentum

In [None]:
#
#  file:  mnist_nn_experiments_30k_momentum.py
#
#  RTK, 19-Oct-2018
#  Last update:  17-Dec-2019
#
###############################################################

import os
import time
import numpy as np
import matplotlib.pylab as plt
from sklearn.neural_network import MLPClassifier


def epoch(x_train, y_train, x_test, y_test, clf):
    """Results for a single epoch"""

    clf.fit(x_train, y_train)
    train_loss = clf.loss_
    train_err = 1.0 - clf.score(x_train, y_train)
    val_err = 1.0 - clf.score(x_test, y_test)
    clf.warm_start = True
    return [train_loss, train_err, val_err]


def run(x_train, y_train, x_test, y_test, clf, max_iter):
    """Train and test"""

    train_loss = []
    train_err = []
    val_err = []

    clf.max_iter = 1  # one epoch at a time
    for i in range(max_iter):
        tl, terr, verr = epoch(x_train, y_train, x_test, y_test, clf)
        train_loss.append(tl)
        train_err.append(terr)
        val_err.append(verr)
        print("    %4d: val_err = %0.5f" % (i, val_err[-1]))

    return [train_loss, train_err, val_err]


def main():
    """Plot the training and validation losses."""

    os.system("rm -rf mnist_nn_experiments_momentum")
    os.system("mkdir mnist_nn_experiments_momentum")

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  Reduce the size of the train dataset
    x_train = x_train[:30000]
    y_train = y_train[:30000]

    #  momentum values
    colors= ['k','r','b','g','c','m']
    momentum = [0.0,0.3,0.5,0.7,0.9,0.99]
    epochs = 100

    for k,m in enumerate(momentum):
        nn = MLPClassifier(solver="sgd", verbose=False, tol=0,
                nesterovs_momentum=False,
                early_stopping=False,
                learning_rate_init=0.01,
                momentum=m,
                hidden_layer_sizes=(100,50),
                activation="relu",
                alpha=0.0001,
                learning_rate="constant",
                batch_size=64,
                max_iter=1)
        print("momentum = %0.1f" % m)
        train_loss, train_err, val_err = run(x_train, y_train, x_test, y_test, nn, epochs)
        print("    final: train error: %0.5f, val error: %0.5f"  % \
            (train_err[-1], val_err[-1]))
        print()
        if (k==0):
            plt.plot(val_err, color=colors[k], linewidth=3)
        else:
            plt.plot(val_err, color=colors[k])
        np.save("mnist_nn_experiments_momentum/train_error_30k_%0.2f.npy" % m, train_err)
        np.save("mnist_nn_experiments_momentum/train_loss_30k_%0.2f.npy" % m, train_loss)
        np.save("mnist_nn_experiments_momentum/val_error_30k_%0.2f.npy" % m, val_err)
    plt.xlabel("Epochs")
    plt.ylabel("Error")
    plt.tight_layout()
    pname = "mnist_nn_experiments_momentum/mnist_nn_experiments_30k_momentum_plot.png"
    plt.savefig(pname, format="png", dpi=600)
    plt.close()


if (__name__ == "__main__"):
    main()



# Weight Initialization

In [None]:
#
#  file:  mnist_nn_experiments_init.py
#
#  RTK, 20-Oct-2018
#  Last update:  27-Nov-2022
#
###############################################################

import os
import numpy as np
import time
import matplotlib.pylab as plt
from sklearn.neural_network import MLPClassifier 

#
#  Possible weight init methods
#
class Classifier(MLPClassifier):
    """Subclass MLPClassifier to use custom weight initialization"""

    def _init_coef(self, fan_in, fan_out, dtype):
        """Custom weight initialization"""

        if (self.init_scheme == 0):
            #  Glorot initialization
            weights, biases = super(Classifier, self)._init_coef(fan_in, fan_out, dtype)
        elif (self.init_scheme == 1):
            #  small uniformly distributed weights
            weights = 0.01*(np.random.random((fan_in, fan_out))-0.5)
            biases = np.zeros(fan_out)
        elif (self.init_scheme == 2):
            #  small Gaussian weights
            weights = 0.005*(np.random.normal(size=(fan_in, fan_out)))
            biases = np.zeros(fan_out)
        elif (self.init_scheme == 3):
            #  He initialization for relu
            weights = np.random.normal(size=(fan_in, fan_out))*  \
                        np.sqrt(2.0/fan_in)
            biases = np.zeros(fan_out)
        elif (self.init_scheme == 4):
            #  Alternate Xavier
            weights = np.random.normal(size=(fan_in, fan_out))*  \
                        np.sqrt(1.0/fan_in)
            biases = np.zeros(fan_out)

        return weights.astype(dtype, copy=False), biases.astype(dtype, copy=False)


def run(x_train, y_train, x_test, y_test, clf, epochs):
    """Train and test"""

    test_err = []
    clf.max_iter = 1
    for i in range(epochs):
        clf.fit(x_train, y_train)
        terr = 1.0 - clf.score(x_test, y_test)
        print("    test error %0.5f" % terr)
        clf.warm_start = True
        test_err.append(terr)
    return test_err


def main():
    """Plot the training and validation losses."""

    outdir = "mnist_nn_experiments_init"
    os.system("rm -rf %s" % outdir)
    os.system("mkdir %s" % outdir)

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  Reduce the size of the train dataset
    x_train = x_train[:6000]
    y_train = y_train[:6000]
    epochs = 4000 
    init_types = 5
    trainings = 10

    test_err = np.zeros((trainings, init_types, epochs)) 

    for i in range(trainings):
        for k in range(init_types):
            print("Training %d, init scheme %d:" % (i,k))
            nn = Classifier(solver="sgd", verbose=False, tol=0,
                   nesterovs_momentum=False, early_stopping=False, learning_rate_init=0.01,
                   momentum=0.9, hidden_layer_sizes=(100,50), activation="relu",
                   alpha=0.2, learning_rate="constant", batch_size=64, max_iter=1)
            nn.init_scheme = k
            test_err[i,k,:] = run(x_train, y_train, x_test, y_test, nn, epochs)

    np.save("mnist_nn_experiments_init_results.npy", test_err)


if (__name__ == "__main__"):
    main()



In [None]:
import numpy as np
import matplotlib.pylab as plt

def smooth(x,window_len=11,window='hanning'):
    if x.ndim != 1:
        raise ValueError("smooth only accepts 1 dimension arrays.")

    if x.size < window_len:
        raise ValueError("Input vector needs to be bigger than window size.")

    if window_len<3:
        return x

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")

    s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')
    y=np.convolve(w/w.sum(),s,mode='valid')
    return y


def main():
    d = np.load("mnist_nn_experiments_init_results.npy")
    d0 = d[:,0,:].mean(axis=0)
    d1 = d[:,1,:].mean(axis=0)
    d2 = d[:,2,:].mean(axis=0)
    d3 = d[:,3,:].mean(axis=0)
    d4 = d[:,4,:].mean(axis=0)

    plt.plot(smooth(d0,53,"flat"), color="k", linewidth=2, linestyle="-")
    plt.plot(smooth(d1,53,"flat"), color="r", linewidth=1, linestyle="-")
    plt.plot(smooth(d2,53,"flat"), color="g", linewidth=1, linestyle="-")
    plt.plot(smooth(d3,53,"flat"), color="b", linewidth=1, linestyle="-")
    plt.plot(smooth(d4,53,"flat"), color="c", linewidth=1, linestyle="-")
    plt.xlabel("Epochs")
    plt.ylabel("Test Error")
    plt.ylim((0.04,0.055))
    plt.xlim((75,4000))
    plt.tight_layout(pad=0, w_pad=0, h_pad=0)
    plt.savefig("mnist_nn_experiments_init_plot.png", dpi=300)
    plt.savefig("mnist_nn_experiments_init_plot.eps", dpi=300)
    plt.close()


main()



# Feature Ordering (Scrambled)

In [None]:
#
#  file:  mnist_nn_experiments_scrambled.py
#
#  Reduced MNIST + NN for Chapter 6.
#
#  RTK, 22-Oct-2018
#  Last update:  22-Oct-2018
#
###############################################################

import os
import numpy as np
import time
from sklearn.neural_network import MLPClassifier 

def epoch(x_train, y_train, x_test, y_test, clf):
    """Results for a single epoch"""

    clf.fit(x_train, y_train)
    train_loss = clf.loss_
    train_err = 1.0 - clf.score(x_train, y_train)
    val_err = 1.0 - clf.score(x_test, y_test)
    clf.warm_start = True
    return [train_loss, train_err, val_err]


def run(x_train, y_train, x_test, y_test, clf, max_iter):
    """Train and test"""

    train_loss = []
    train_err = []
    val_err = []

    clf.max_iter = 1  # one epoch at a time
    for i in range(max_iter):
        tl, terr, verr = epoch(x_train, y_train, x_test, y_test, clf)
        train_loss.append(tl)
        train_err.append(terr)
        val_err.append(verr)
        print "    %4d: val_err = %0.5f" % (i, val_err[-1])

    wavg = 0.0
    n = 0
    for w in clf.coefs_:
        wavg += w.sum()
        n += w.size
    wavg /= n

    return [train_loss, train_err, val_err, wavg]


def main():
    """Plot the training and validation losses."""

    outdir = "mnist_nn_experiments_scrambled"
    os.system("rm -rf %s" % outdir)
    os.system("mkdir %s" % outdir)

    #  Vector MNIST versions scaled [0,1)
    x_train = np.load("../data/mnist/mnist_train_vectors.npy").astype("float64")/256.0
    xstrain = np.load("../data/mnist/mnist_train_scrambled_vectors.npy").astype("float64")/256.0
    y_train = np.load("../data/mnist/mnist_train_labels.npy")
    x_test = np.load("../data/mnist/mnist_test_vectors.npy").astype("float64")/256.0
    xstest = np.load("../data/mnist/mnist_test_scrambled_vectors.npy").astype("float64")/256.0
    y_test = np.load("../data/mnist/mnist_test_labels.npy")

    #  Reduce the size of the train dataset
    x_train = x_train[:6000]
    y_train = y_train[:6000]
    xstrain = xstrain[:6000]
    epochs = 6000 

    print "Unscrambled"
    nn = MLPClassifier(solver="sgd", verbose=False, tol=0,
           nesterovs_momentum=False, early_stopping=False, learning_rate_init=0.01,
           momentum=0.9, hidden_layer_sizes=(100,50), activation="relu",
           alpha=0.2, learning_rate="constant", batch_size=64, max_iter=1)
    train_loss, train_err, val_err, wavg = run(x_train, y_train, x_test, y_test, nn, epochs)
    print "    final: train error: %0.5f, val error: %0.5f, mean weight value = %0.8f"  % \
        (train_err[-1], val_err[-1], wavg)
    print
    np.save(outdir + ("/train_error.npy"), train_err)
    np.save(outdir + ("/train_loss.npy"), train_loss)
    np.save(outdir + ("/val_error.npy"), val_err)
    np.save(outdir + ("/mean_weight.npy"), np.array(wavg))

    print "Scrambled"
    nn = MLPClassifier(solver="sgd", verbose=False, tol=0,
           nesterovs_momentum=False, early_stopping=False, learning_rate_init=0.01,
           momentum=0.9, hidden_layer_sizes=(100,50), activation="relu",
           alpha=0.2, learning_rate="constant", batch_size=64, max_iter=1)
    train_loss, train_err, val_err, wavg = run(xstrain, y_train, xstest, y_test, nn, epochs)
    print "    final: train error: %0.5f, val error: %0.5f, mean weight value = %0.8f"  % \
        (train_err[-1], val_err[-1], wavg)
    print
    np.save(outdir + ("/train_error_scrambled.npy"), train_err)
    np.save(outdir + ("/train_loss_scrambled.npy"), train_loss)
    np.save(outdir + ("/val_error_scrambled.npy"), val_err)
    np.save(outdir + ("/mean_weight_scrambled.npy"), np.array(wavg))


if (__name__ == "__main__"):
    main()



In [None]:
import numpy as np
import matplotlib.pylab as plt
from scipy.stats import ttest_ind

def smooth(x,window_len=11,window='hanning'):
    if x.ndim != 1:
        raise ValueError, "smooth only accepts 1 dimension arrays."

    if x.size < window_len:
        raise ValueError, "Input vector needs to be bigger than window size."

    if window_len<3:
        return x

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"

    s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')
    y=np.convolve(w/w.sum(),s,mode='valid')
    return y


def main():
    c0 = np.load("mnist_nn_experiments_scrambled_run0/val_error.npy")
    c1 = np.load("mnist_nn_experiments_scrambled_run1/val_error.npy")
    c2 = np.load("mnist_nn_experiments_scrambled_run2/val_error.npy")
    c3 = np.load("mnist_nn_experiments_scrambled_run3/val_error.npy")
    c4 = np.load("mnist_nn_experiments_scrambled_run4/val_error.npy")
    c5 = np.load("mnist_nn_experiments_scrambled_run5/val_error.npy")
    c6 = np.load("mnist_nn_experiments_scrambled_run6/val_error.npy")
    c7 = np.load("mnist_nn_experiments_scrambled_run7/val_error.npy")
    c8 = np.load("mnist_nn_experiments_scrambled_run8/val_error.npy")
    c9 = np.load("mnist_nn_experiments_scrambled_run9/val_error.npy")
    c = np.array([c0,c1,c2,c3,c4,c5,c6,c7,c8,c9])

    d0 = np.load("mnist_nn_experiments_scrambled_run0/val_error_scrambled.npy")
    d1 = np.load("mnist_nn_experiments_scrambled_run1/val_error_scrambled.npy")
    d2 = np.load("mnist_nn_experiments_scrambled_run2/val_error_scrambled.npy")
    d3 = np.load("mnist_nn_experiments_scrambled_run3/val_error_scrambled.npy")
    d4 = np.load("mnist_nn_experiments_scrambled_run4/val_error_scrambled.npy")
    d5 = np.load("mnist_nn_experiments_scrambled_run5/val_error_scrambled.npy")
    d6 = np.load("mnist_nn_experiments_scrambled_run6/val_error_scrambled.npy")
    d7 = np.load("mnist_nn_experiments_scrambled_run7/val_error_scrambled.npy")
    d8 = np.load("mnist_nn_experiments_scrambled_run8/val_error_scrambled.npy")
    d9 = np.load("mnist_nn_experiments_scrambled_run9/val_error_scrambled.npy")
    d = np.array([d0,d1,d2,d3,d4,d5,d6,d7,d8,d9])
    
    print ttest_ind(c[:,-1],d[:,-1])

    plt.plot(smooth(c.mean(axis=0),53,"flat"), color="k", linewidth=1, linestyle="-")
    plt.plot(smooth(d.mean(axis=0),53,"flat"), color="r", linewidth=1, linestyle="-")
    plt.xlabel("Epochs", fontsize=16)
    plt.ylabel("Test Error", fontsize=16)
    plt.ylim((0.04,0.055))
    plt.xlim((75,4000))
    plt.tight_layout()
    plt.savefig("mnist_nn_experiments_scrambled_plot.png", type="png", dpi=600)
    plt.savefig("mnist_nn_experiments_scrambled_plot.pdf", type="pdf", dpi=600)
    plt.show()


main()

