In [1]:
# Data
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import impl.layer as l

# Dataset preparation and pre-processing
mnist = input_data.read_data_sets('data/MNIST_data/', one_hot=False)

X_train, y_train = mnist.train.images, mnist.train.labels
X_val, y_val = mnist.validation.images, mnist.validation.labels
X_test, y_test = mnist.test.images, mnist.test.labels
y_test.shape, y_val.shape, y_train.shape

Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz


((10000,), (5000,), (55000,))

In [6]:
# Pre-processing
def prepro(X_train, X_val, X_test):
    mean = np.mean(X_train)
    # scale = 255. - mean # std or sqrt(var), 255 == 2**8 or 8 bit grayscale
    # return (X_train - mean)/ scale, (X_val - mean)/ scale, (X_test - mean) / scale
    return X_train - mean, X_val - mean, X_test - mean

X_train, X_val, X_test = prepro(X_train, X_val, X_test)
X_train.shape, X_val.shape, X_test.shape

((55000, 784), (5000, 784), (10000, 784))

In [None]:
# Model
import impl.layer as l
from impl.loss import * # import all! 
from sklearn.utils import shuffle as skshuffle

class FFNN:

    def __init__(self, D, C, H, L):
        self.L = L # number of hidden layer
        self.losses = {'train':[], 'valid':[], 'valid_acc':[]}
        self.model = [] # feedforward parameters: learnable
        self.W_fixed = [] # feedback weights: fixed
        
        # Input layer
        m = dict(W=np.random.uniform(size=(D, H), low=0.0, high=1.0),
                 b=np.zeros((1, H)))
        self.model.append(m) # self.model[0]
        W = np.random.uniform(size=(D, H), low=0.0, high=1.0)
        self.W_fixed.append(W) #self.W_fixed[0]

        # Hidden layers
        m_L = []
        for _ in range(L):
            m = dict(W=np.random.uniform(size=(H, H), low=0.0, high=1.0),
                     b=np.zeros((1, H)))
            m_L.append(m)
        self.model.append(m_L) # self.model[1][L]
        W_L = []
        for _ in range(L):
            W = np.random.uniform(size=(H, H), low=0.0, high=1.0)
            W_L.append(W)
        self.W_fixed.append(W_L) # self.W_fixed[1][L]
        
        # Output layer
        m = dict(W=np.random.uniform(size=(H, C), low=0.0, high=1.0),
                 b=np.zeros((1, C)))
        self.model.append(m) # self.model[2]
        W = np.random.uniform(size=(H, C), low=0.0, high=1.0)
        self.W_fixed.append(W) # self.W_fixed[2]
        
    def fc_forward(self, X, W, b):
        out = (X @ W) + b
        cache = (W, X)
        return out, cache

    def fc_backward(self, dout, cache, W_fixed):
        W, X = cache
        dW = X.T @ dout
        db = np.sum(dout, axis=0).reshape(1, -1) # db_1xn
        # dX = dout @ W.T # Backprop
        dX = dout @ W_fixed.T # fb alignment/ Bioprop

        return dX, dW, db

    def train_forward(self, X):
        caches = []
        
        # Input layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[0]['W'], b=self.model[0]['b']) # X_1xD, y_1xc
        y = l.sigmoid(X=y)
        X = y.copy() # pass the previous output to the next layer as the input
        caches.append(fc_cache) # caches[0]
        
        # Hidden layers
        fc_caches = []
        for layer in range(self.L):
            y, fc_cache = self.fc_forward(X=X, W=self.model[1][layer]['W'], b=self.model[1][layer]['b'])
            y = l.sigmoid(X=y)
            X = y.copy() # pass to next layer
            fc_caches.append(fc_cache)
        caches.append(fc_caches) # caches[1]
        
        # Output layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[2]['W'], b=self.model[2]['b'])
        caches.append(fc_cache) # caches[2]

        return y, caches

    def loss_function(self, y, y_train):
        
        loss = cross_entropy(y, y_train) # softmax is included due to entropy for classification
        dy = dcross_entropy(y, y_train) # dsoftmax is included due to entropy for classification
        
        return loss, dy
    
    def train_backward(self, dy, caches):
        grads = []
        
        # Output layer: first layer in feedback vs last layer in feedforward
        fc_cache = caches[2]
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache, W_fixed=self.W_fixed[2])
        dX = l.sigmoid(X=dX)
        dy = dX.copy() # pass it to the previous layer
        grads.append((dW, db)) # grads[0] vs model[2]

        # Hidden layer: middles layers in feedback vs middle layers in feedforward
        fc_caches = caches[1]
        grads_L = []
        for layer in reversed(range(self.L)):
            dX, dW, db = self.fc_backward(dout=dy, cache=fc_caches[layer], W_fixed=self.W_fixed[1][layer])
            dX = l.sigmoid(X=dX)
            dy = dX.copy()
            grads_L.append((dW, db))
        grads.append(grads_L) # grads[1][L] vs model[1][reversed(L)]
        
        # Input layer: last layer in feedback vs first layer in feedforward
        fc_cache = caches[0]
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache, W_fixed=self.W_fixed[0])
        grads.append((dW, db)) # grads[2] vs model[0]

        return dX, grads
    
    def test(self, X):
        y_logit, _ = self.train_forward(X)
        
        # if self.mode == 'classification':
        y_prob = l.softmax(y_logit) # for accuracy== acc
        y_pred = np.argmax(y_prob, axis=1) # for loss==err
        
        return y_pred, y_logit
        
    def get_minibatch(self, X, y, minibatch_size, shuffle):
        minibatches = []

        if shuffle:
            X, y = skshuffle(X, y)

        for i in range(0, X.shape[0], minibatch_size):
            X_mini = X[i:i + minibatch_size]
            y_mini = y[i:i + minibatch_size]
            minibatches.append((X_mini, y_mini))

        return minibatches

    def sgd(self, train_set, val_set, alpha, mb_size, n_iter, print_after):
        X_train, y_train = train_set
        X_val, y_val = val_set
 
        # Epochs
        for iter in range(1, n_iter + 1):

            # Minibatches
            minibatches = self.get_minibatch(X_train, y_train, mb_size, shuffle=True)
            idx = np.random.randint(0, len(minibatches))
            X_mini, y_mini = minibatches[idx]
            
            # Train the model
            y, caches = self.train_forward(X_mini)
            loss, dy = self.loss_function(y, y_mini)
            _, grads = self.train_backward(dy, caches)
            self.losses['train'].append(loss)
            
            # Update the model for input layer
            dW, db = grads[2] # last layer for feedback vs first layer for feedforward
            self.model[0]['W'] -= alpha * dW
            self.model[0]['b'] -= alpha * db

            # Update the model for the hidden layers
            for layer in range(self.L): # last layer: self.L-1
                dW, db = grads[1][(self.L-1)-layer] # middle layer for feedback vs middle layer for feedforward
                self.model[1][layer]['W'] -= alpha * dW
                self.model[1][layer]['b'] -= alpha * db

            # Update the model for output layer
            dW, db = grads[0] # first layer for feedback vs last layer for feedforward
            self.model[2]['W'] -= alpha * dW
            self.model[2]['b'] -= alpha * db

            # Validate the updated model
            y_pred, y_logit = self.test(X_val)
            valid_loss, _ = self.loss_function(y_logit, y_val) # softmax included
            self.losses['valid'].append(valid_loss)
            valid_acc = np.mean(y_pred == y_val)
            self.losses['valid_acc'].append(valid_acc)
            
            # Print the model info: loss & accuracy or err & acc
            if iter % print_after == 0:
                print('Iter-{} train loss: {:.4f} valid loss: {:.4f}, valid accuracy: {:.4f}'.format(
                    iter, loss, valid_loss, valid_acc))

        # Test the final model
        y_pred, y_logit = nn.test(X_test)
        loss, _ = self.loss_function(y_logit, y_test) # softmax is included in entropy loss function
        acc = np.mean(y_pred == y_test)
        print('Last iteration - Test accuracy mean: {:.4f}, std: {:.4f}, loss: {:.4f}'.format(
            acc.mean(), acc.std(), loss))

In [None]:
# Hyper-parameters
n_iter = 100000 # number of epochs
alpha = 1e-3 # learning_rate
mb_size = 64 # 2**10==1024 # width, timestep for sequential data or minibatch size
print_after = 10 # n_iter//10 # print loss for train, valid, and test
num_hidden_units = 32 # number of kernels/ filters in each layer
num_input_units = X_train.shape[1] # noise added at the input lavel as input noise we can use dX or for more improvement
num_output_units = y_train.max() + 1 # number of classes in this classification problem
num_layers = 1 # depth 

# Build the model/NN and learn it: running session.
nn = FFNN(C=num_output_units, D=num_input_units, H=num_hidden_units, L=num_layers)

nn.sgd(train_set=(X_train, y_train), val_set=(X_val, y_val), mb_size=mb_size, alpha=alpha, 
           n_iter=n_iter, print_after=print_after)

Iter-10 train loss: 3.1113 valid loss: 2.9985, valid accuracy: 0.1126
Iter-20 train loss: 2.8898 valid loss: 2.9648, valid accuracy: 0.1126
Iter-30 train loss: 2.8516 valid loss: 2.9351, valid accuracy: 0.1126
Iter-40 train loss: 2.7498 valid loss: 2.9058, valid accuracy: 0.1126
Iter-50 train loss: 2.8745 valid loss: 2.8788, valid accuracy: 0.1126
Iter-60 train loss: 2.9816 valid loss: 2.8523, valid accuracy: 0.1126
Iter-70 train loss: 2.7728 valid loss: 2.8294, valid accuracy: 0.1126
Iter-80 train loss: 2.6650 valid loss: 2.8079, valid accuracy: 0.1126
Iter-90 train loss: 2.9295 valid loss: 2.7859, valid accuracy: 0.1126
Iter-100 train loss: 2.7619 valid loss: 2.7659, valid accuracy: 0.1126
Iter-110 train loss: 2.6737 valid loss: 2.7471, valid accuracy: 0.1126
Iter-120 train loss: 2.6064 valid loss: 2.7295, valid accuracy: 0.1126
Iter-130 train loss: 2.5126 valid loss: 2.7137, valid accuracy: 0.1126
Iter-140 train loss: 2.5897 valid loss: 2.6991, valid accuracy: 0.1126
Iter-150 train 

Iter-1170 train loss: 2.2953 valid loss: 2.3079, valid accuracy: 0.0970
Iter-1180 train loss: 2.3242 valid loss: 2.3076, valid accuracy: 0.0970
Iter-1190 train loss: 2.3258 valid loss: 2.3075, valid accuracy: 0.0970
Iter-1200 train loss: 2.3313 valid loss: 2.3073, valid accuracy: 0.0970
Iter-1210 train loss: 2.2922 valid loss: 2.3070, valid accuracy: 0.0970
Iter-1220 train loss: 2.2891 valid loss: 2.3065, valid accuracy: 0.0970
Iter-1230 train loss: 2.3125 valid loss: 2.3063, valid accuracy: 0.0970
Iter-1240 train loss: 2.3211 valid loss: 2.3061, valid accuracy: 0.0970
Iter-1250 train loss: 2.2927 valid loss: 2.3057, valid accuracy: 0.0970
Iter-1260 train loss: 2.3122 valid loss: 2.3055, valid accuracy: 0.0972
Iter-1270 train loss: 2.3160 valid loss: 2.3053, valid accuracy: 0.0974
Iter-1280 train loss: 2.2918 valid loss: 2.3050, valid accuracy: 0.0974
Iter-1290 train loss: 2.3084 valid loss: 2.3048, valid accuracy: 0.0974
Iter-1300 train loss: 2.3153 valid loss: 2.3045, valid accuracy:

Iter-2310 train loss: 2.2894 valid loss: 2.2969, valid accuracy: 0.1080
Iter-2320 train loss: 2.2829 valid loss: 2.2968, valid accuracy: 0.1070
Iter-2330 train loss: 2.2963 valid loss: 2.2968, valid accuracy: 0.1078
Iter-2340 train loss: 2.3049 valid loss: 2.2967, valid accuracy: 0.1074
Iter-2350 train loss: 2.3003 valid loss: 2.2966, valid accuracy: 0.1084
Iter-2360 train loss: 2.2857 valid loss: 2.2966, valid accuracy: 0.1072
Iter-2370 train loss: 2.2846 valid loss: 2.2965, valid accuracy: 0.1062
Iter-2380 train loss: 2.3027 valid loss: 2.2965, valid accuracy: 0.1060
Iter-2390 train loss: 2.2906 valid loss: 2.2965, valid accuracy: 0.1058
Iter-2400 train loss: 2.3112 valid loss: 2.2965, valid accuracy: 0.1056
Iter-2410 train loss: 2.2971 valid loss: 2.2964, valid accuracy: 0.1058
Iter-2420 train loss: 2.3048 valid loss: 2.2964, valid accuracy: 0.1066
Iter-2430 train loss: 2.3125 valid loss: 2.2963, valid accuracy: 0.1060
Iter-2440 train loss: 2.2739 valid loss: 2.2963, valid accuracy:

Iter-3450 train loss: 2.3177 valid loss: 2.2930, valid accuracy: 0.1108
Iter-3460 train loss: 2.2840 valid loss: 2.2931, valid accuracy: 0.1112
Iter-3470 train loss: 2.3071 valid loss: 2.2930, valid accuracy: 0.1104
Iter-3480 train loss: 2.3004 valid loss: 2.2929, valid accuracy: 0.1110
Iter-3490 train loss: 2.3052 valid loss: 2.2929, valid accuracy: 0.1106
Iter-3500 train loss: 2.2801 valid loss: 2.2928, valid accuracy: 0.1112
Iter-3510 train loss: 2.2976 valid loss: 2.2928, valid accuracy: 0.1120
Iter-3520 train loss: 2.2714 valid loss: 2.2928, valid accuracy: 0.1122
Iter-3530 train loss: 2.3028 valid loss: 2.2926, valid accuracy: 0.1114
Iter-3540 train loss: 2.2957 valid loss: 2.2927, valid accuracy: 0.1110
Iter-3550 train loss: 2.2896 valid loss: 2.2927, valid accuracy: 0.1114
Iter-3560 train loss: 2.2953 valid loss: 2.2926, valid accuracy: 0.1122
Iter-3570 train loss: 2.2900 valid loss: 2.2925, valid accuracy: 0.1118
Iter-3580 train loss: 2.2931 valid loss: 2.2924, valid accuracy:

Iter-4590 train loss: 2.2970 valid loss: 2.2893, valid accuracy: 0.1158
Iter-4600 train loss: 2.3304 valid loss: 2.2894, valid accuracy: 0.1154
Iter-4610 train loss: 2.2878 valid loss: 2.2893, valid accuracy: 0.1152
Iter-4620 train loss: 2.2880 valid loss: 2.2894, valid accuracy: 0.1154
Iter-4630 train loss: 2.2863 valid loss: 2.2893, valid accuracy: 0.1152
Iter-4640 train loss: 2.3026 valid loss: 2.2892, valid accuracy: 0.1148
Iter-4650 train loss: 2.2925 valid loss: 2.2891, valid accuracy: 0.1150
Iter-4660 train loss: 2.2661 valid loss: 2.2892, valid accuracy: 0.1150
Iter-4670 train loss: 2.2829 valid loss: 2.2892, valid accuracy: 0.1148
Iter-4680 train loss: 2.2814 valid loss: 2.2892, valid accuracy: 0.1140
Iter-4690 train loss: 2.2947 valid loss: 2.2892, valid accuracy: 0.1140
Iter-4700 train loss: 2.3294 valid loss: 2.2891, valid accuracy: 0.1140
Iter-4710 train loss: 2.3020 valid loss: 2.2890, valid accuracy: 0.1142
Iter-4720 train loss: 2.2883 valid loss: 2.2889, valid accuracy:

Iter-5730 train loss: 2.2721 valid loss: 2.2853, valid accuracy: 0.1178
Iter-5740 train loss: 2.2784 valid loss: 2.2854, valid accuracy: 0.1176
Iter-5750 train loss: 2.2945 valid loss: 2.2852, valid accuracy: 0.1176
Iter-5760 train loss: 2.3003 valid loss: 2.2852, valid accuracy: 0.1182
Iter-5770 train loss: 2.2660 valid loss: 2.2851, valid accuracy: 0.1180
Iter-5780 train loss: 2.2953 valid loss: 2.2851, valid accuracy: 0.1184
Iter-5790 train loss: 2.3007 valid loss: 2.2850, valid accuracy: 0.1188
Iter-5800 train loss: 2.2729 valid loss: 2.2849, valid accuracy: 0.1184
Iter-5810 train loss: 2.2731 valid loss: 2.2848, valid accuracy: 0.1184
Iter-5820 train loss: 2.2707 valid loss: 2.2848, valid accuracy: 0.1182
Iter-5830 train loss: 2.2882 valid loss: 2.2847, valid accuracy: 0.1178
Iter-5840 train loss: 2.2974 valid loss: 2.2847, valid accuracy: 0.1180
Iter-5850 train loss: 2.2859 valid loss: 2.2847, valid accuracy: 0.1170
Iter-5860 train loss: 2.2800 valid loss: 2.2846, valid accuracy:

Iter-6870 train loss: 2.2703 valid loss: 2.2805, valid accuracy: 0.1192
Iter-6880 train loss: 2.2914 valid loss: 2.2804, valid accuracy: 0.1198
Iter-6890 train loss: 2.2882 valid loss: 2.2803, valid accuracy: 0.1208
Iter-6900 train loss: 2.2819 valid loss: 2.2803, valid accuracy: 0.1186
Iter-6910 train loss: 2.2600 valid loss: 2.2803, valid accuracy: 0.1188
Iter-6920 train loss: 2.2714 valid loss: 2.2803, valid accuracy: 0.1196
Iter-6930 train loss: 2.2997 valid loss: 2.2802, valid accuracy: 0.1194
Iter-6940 train loss: 2.2766 valid loss: 2.2802, valid accuracy: 0.1188
Iter-6950 train loss: 2.3051 valid loss: 2.2802, valid accuracy: 0.1178
Iter-6960 train loss: 2.2934 valid loss: 2.2802, valid accuracy: 0.1182
Iter-6970 train loss: 2.2676 valid loss: 2.2801, valid accuracy: 0.1176
Iter-6980 train loss: 2.2553 valid loss: 2.2801, valid accuracy: 0.1176
Iter-6990 train loss: 2.3079 valid loss: 2.2801, valid accuracy: 0.1172
Iter-7000 train loss: 2.3009 valid loss: 2.2801, valid accuracy:

Iter-8010 train loss: 2.2822 valid loss: 2.2765, valid accuracy: 0.1416
Iter-8020 train loss: 2.2795 valid loss: 2.2765, valid accuracy: 0.1422
Iter-8030 train loss: 2.2630 valid loss: 2.2764, valid accuracy: 0.1420
Iter-8040 train loss: 2.2747 valid loss: 2.2765, valid accuracy: 0.1204
Iter-8050 train loss: 2.2875 valid loss: 2.2764, valid accuracy: 0.1200
Iter-8060 train loss: 2.2710 valid loss: 2.2763, valid accuracy: 0.1208
Iter-8070 train loss: 2.2600 valid loss: 2.2763, valid accuracy: 0.1210
Iter-8080 train loss: 2.2602 valid loss: 2.2763, valid accuracy: 0.1212
Iter-8090 train loss: 2.2605 valid loss: 2.2763, valid accuracy: 0.1206
Iter-8100 train loss: 2.2702 valid loss: 2.2763, valid accuracy: 0.1212
Iter-8110 train loss: 2.2577 valid loss: 2.2762, valid accuracy: 0.1218
Iter-8120 train loss: 2.2625 valid loss: 2.2762, valid accuracy: 0.1220
Iter-8130 train loss: 2.2713 valid loss: 2.2762, valid accuracy: 0.1218
Iter-8140 train loss: 2.2800 valid loss: 2.2762, valid accuracy:

Iter-9150 train loss: 2.2750 valid loss: 2.2727, valid accuracy: 0.1434
Iter-9160 train loss: 2.2720 valid loss: 2.2726, valid accuracy: 0.1440
Iter-9170 train loss: 2.2673 valid loss: 2.2726, valid accuracy: 0.1436
Iter-9180 train loss: 2.2935 valid loss: 2.2725, valid accuracy: 0.1438
Iter-9190 train loss: 2.2739 valid loss: 2.2725, valid accuracy: 0.1442
Iter-9200 train loss: 2.2492 valid loss: 2.2725, valid accuracy: 0.1250
Iter-9210 train loss: 2.2751 valid loss: 2.2725, valid accuracy: 0.1254
Iter-9220 train loss: 2.2796 valid loss: 2.2724, valid accuracy: 0.1256
Iter-9230 train loss: 2.2826 valid loss: 2.2724, valid accuracy: 0.1234
Iter-9240 train loss: 2.2994 valid loss: 2.2723, valid accuracy: 0.1242
Iter-9250 train loss: 2.2885 valid loss: 2.2723, valid accuracy: 0.1244
Iter-9260 train loss: 2.2833 valid loss: 2.2722, valid accuracy: 0.1266
Iter-9270 train loss: 2.2616 valid loss: 2.2721, valid accuracy: 0.1432
Iter-9280 train loss: 2.2963 valid loss: 2.2721, valid accuracy:

Iter-10290 train loss: 2.2804 valid loss: 2.2687, valid accuracy: 0.1500
Iter-10300 train loss: 2.2744 valid loss: 2.2687, valid accuracy: 0.1502
Iter-10310 train loss: 2.2687 valid loss: 2.2687, valid accuracy: 0.1502
Iter-10320 train loss: 2.2535 valid loss: 2.2687, valid accuracy: 0.1500
Iter-10330 train loss: 2.2939 valid loss: 2.2687, valid accuracy: 0.1508
Iter-10340 train loss: 2.2721 valid loss: 2.2687, valid accuracy: 0.1502
Iter-10350 train loss: 2.2689 valid loss: 2.2685, valid accuracy: 0.1502
Iter-10360 train loss: 2.2717 valid loss: 2.2685, valid accuracy: 0.1498
Iter-10370 train loss: 2.2602 valid loss: 2.2684, valid accuracy: 0.1500
Iter-10380 train loss: 2.2846 valid loss: 2.2683, valid accuracy: 0.1496
Iter-10390 train loss: 2.2857 valid loss: 2.2682, valid accuracy: 0.1496
Iter-10400 train loss: 2.2745 valid loss: 2.2682, valid accuracy: 0.1494
Iter-10410 train loss: 2.2759 valid loss: 2.2682, valid accuracy: 0.1492
Iter-10420 train loss: 2.2762 valid loss: 2.2681, v

Iter-11420 train loss: 2.2699 valid loss: 2.2642, valid accuracy: 0.1508
Iter-11430 train loss: 2.2833 valid loss: 2.2642, valid accuracy: 0.1506
Iter-11440 train loss: 2.2403 valid loss: 2.2642, valid accuracy: 0.1500
Iter-11450 train loss: 2.2976 valid loss: 2.2641, valid accuracy: 0.1504
Iter-11460 train loss: 2.2622 valid loss: 2.2641, valid accuracy: 0.1500
Iter-11470 train loss: 2.2830 valid loss: 2.2640, valid accuracy: 0.1502
Iter-11480 train loss: 2.2930 valid loss: 2.2639, valid accuracy: 0.1506
Iter-11490 train loss: 2.2690 valid loss: 2.2639, valid accuracy: 0.1502
Iter-11500 train loss: 2.2445 valid loss: 2.2639, valid accuracy: 0.1504
Iter-11510 train loss: 2.2682 valid loss: 2.2638, valid accuracy: 0.1506
Iter-11520 train loss: 2.2937 valid loss: 2.2637, valid accuracy: 0.1518
Iter-11530 train loss: 2.2838 valid loss: 2.2636, valid accuracy: 0.1518
Iter-11540 train loss: 2.2485 valid loss: 2.2635, valid accuracy: 0.1516
Iter-11550 train loss: 2.2426 valid loss: 2.2635, v

Iter-12550 train loss: 2.2569 valid loss: 2.2599, valid accuracy: 0.1524
Iter-12560 train loss: 2.2623 valid loss: 2.2598, valid accuracy: 0.1528
Iter-12570 train loss: 2.2701 valid loss: 2.2597, valid accuracy: 0.1534
Iter-12580 train loss: 2.2524 valid loss: 2.2597, valid accuracy: 0.1526
Iter-12590 train loss: 2.2343 valid loss: 2.2597, valid accuracy: 0.1524
Iter-12600 train loss: 2.2835 valid loss: 2.2598, valid accuracy: 0.1524
Iter-12610 train loss: 2.2789 valid loss: 2.2597, valid accuracy: 0.1520
Iter-12620 train loss: 2.2488 valid loss: 2.2598, valid accuracy: 0.1522
Iter-12630 train loss: 2.2388 valid loss: 2.2597, valid accuracy: 0.1526
Iter-12640 train loss: 2.2849 valid loss: 2.2597, valid accuracy: 0.1520
Iter-12650 train loss: 2.2526 valid loss: 2.2597, valid accuracy: 0.1520
Iter-12660 train loss: 2.2176 valid loss: 2.2598, valid accuracy: 0.1516
Iter-12670 train loss: 2.2576 valid loss: 2.2597, valid accuracy: 0.1512
Iter-12680 train loss: 2.2807 valid loss: 2.2597, v

Iter-13680 train loss: 2.2389 valid loss: 2.2559, valid accuracy: 0.1438
Iter-13690 train loss: 2.2674 valid loss: 2.2558, valid accuracy: 0.1444
Iter-13700 train loss: 2.2483 valid loss: 2.2558, valid accuracy: 0.1452
Iter-13710 train loss: 2.2796 valid loss: 2.2557, valid accuracy: 0.1452
Iter-13720 train loss: 2.2598 valid loss: 2.2557, valid accuracy: 0.1450
Iter-13730 train loss: 2.2675 valid loss: 2.2557, valid accuracy: 0.1446
Iter-13740 train loss: 2.2843 valid loss: 2.2558, valid accuracy: 0.1444
Iter-13750 train loss: 2.2736 valid loss: 2.2558, valid accuracy: 0.1454
Iter-13760 train loss: 2.2531 valid loss: 2.2556, valid accuracy: 0.1448
Iter-13770 train loss: 2.2546 valid loss: 2.2555, valid accuracy: 0.1450
Iter-13780 train loss: 2.2627 valid loss: 2.2555, valid accuracy: 0.1458
Iter-13790 train loss: 2.2746 valid loss: 2.2555, valid accuracy: 0.1462
Iter-13800 train loss: 2.2476 valid loss: 2.2556, valid accuracy: 0.1470
Iter-13810 train loss: 2.2390 valid loss: 2.2555, v

Iter-14810 train loss: 2.2602 valid loss: 2.2519, valid accuracy: 0.1432
Iter-14820 train loss: 2.2671 valid loss: 2.2519, valid accuracy: 0.1422
Iter-14830 train loss: 2.2540 valid loss: 2.2518, valid accuracy: 0.1430
Iter-14840 train loss: 2.2347 valid loss: 2.2518, valid accuracy: 0.1426
Iter-14850 train loss: 2.2633 valid loss: 2.2519, valid accuracy: 0.1454
Iter-14860 train loss: 2.2604 valid loss: 2.2519, valid accuracy: 0.1444
Iter-14870 train loss: 2.2671 valid loss: 2.2518, valid accuracy: 0.1452
Iter-14880 train loss: 2.2479 valid loss: 2.2518, valid accuracy: 0.1448
Iter-14890 train loss: 2.2426 valid loss: 2.2517, valid accuracy: 0.1434
Iter-14900 train loss: 2.2508 valid loss: 2.2517, valid accuracy: 0.1434
Iter-14910 train loss: 2.2558 valid loss: 2.2516, valid accuracy: 0.1406
Iter-14920 train loss: 2.2417 valid loss: 2.2516, valid accuracy: 0.1410
Iter-14930 train loss: 2.2455 valid loss: 2.2516, valid accuracy: 0.1412
Iter-14940 train loss: 2.2575 valid loss: 2.2515, v

Iter-15940 train loss: 2.2415 valid loss: 2.2481, valid accuracy: 0.1868
Iter-15950 train loss: 2.2349 valid loss: 2.2480, valid accuracy: 0.1838
Iter-15960 train loss: 2.2497 valid loss: 2.2478, valid accuracy: 0.1780
Iter-15970 train loss: 2.2297 valid loss: 2.2477, valid accuracy: 0.1774
Iter-15980 train loss: 2.2370 valid loss: 2.2477, valid accuracy: 0.1758
Iter-15990 train loss: 2.2434 valid loss: 2.2477, valid accuracy: 0.1792
Iter-16000 train loss: 2.2548 valid loss: 2.2477, valid accuracy: 0.1792
Iter-16010 train loss: 2.2312 valid loss: 2.2476, valid accuracy: 0.1772
Iter-16020 train loss: 2.2639 valid loss: 2.2475, valid accuracy: 0.1736
Iter-16030 train loss: 2.2613 valid loss: 2.2475, valid accuracy: 0.1704
Iter-16040 train loss: 2.2497 valid loss: 2.2474, valid accuracy: 0.1708
Iter-16050 train loss: 2.2518 valid loss: 2.2474, valid accuracy: 0.1692
Iter-16060 train loss: 2.2691 valid loss: 2.2474, valid accuracy: 0.1650
Iter-16070 train loss: 2.2310 valid loss: 2.2473, v

Iter-17070 train loss: 2.2312 valid loss: 2.2415, valid accuracy: 0.1208
Iter-17080 train loss: 2.2472 valid loss: 2.2415, valid accuracy: 0.1212
Iter-17090 train loss: 2.2355 valid loss: 2.2414, valid accuracy: 0.1208
Iter-17100 train loss: 2.2538 valid loss: 2.2414, valid accuracy: 0.1216
Iter-17110 train loss: 2.2605 valid loss: 2.2411, valid accuracy: 0.1210
Iter-17120 train loss: 2.2358 valid loss: 2.2411, valid accuracy: 0.1210
Iter-17130 train loss: 2.2378 valid loss: 2.2410, valid accuracy: 0.1208
Iter-17140 train loss: 2.2808 valid loss: 2.2410, valid accuracy: 0.1212
Iter-17150 train loss: 2.2455 valid loss: 2.2410, valid accuracy: 0.1220
Iter-17160 train loss: 2.2502 valid loss: 2.2409, valid accuracy: 0.1216
Iter-17170 train loss: 2.2346 valid loss: 2.2408, valid accuracy: 0.1204
Iter-17180 train loss: 2.2672 valid loss: 2.2407, valid accuracy: 0.1206
Iter-17190 train loss: 2.2633 valid loss: 2.2407, valid accuracy: 0.1210
Iter-17200 train loss: 2.2406 valid loss: 2.2406, v

Iter-18200 train loss: 2.2479 valid loss: 2.2334, valid accuracy: 0.1270
Iter-18210 train loss: 2.2317 valid loss: 2.2333, valid accuracy: 0.1270
Iter-18220 train loss: 2.2356 valid loss: 2.2332, valid accuracy: 0.1264
Iter-18230 train loss: 2.2330 valid loss: 2.2332, valid accuracy: 0.1264
Iter-18240 train loss: 2.2025 valid loss: 2.2331, valid accuracy: 0.1268
Iter-18250 train loss: 2.2327 valid loss: 2.2331, valid accuracy: 0.1270
Iter-18260 train loss: 2.2285 valid loss: 2.2330, valid accuracy: 0.1268
Iter-18270 train loss: 2.2181 valid loss: 2.2329, valid accuracy: 0.1266
Iter-18280 train loss: 2.2726 valid loss: 2.2328, valid accuracy: 0.1258
Iter-18290 train loss: 2.2129 valid loss: 2.2326, valid accuracy: 0.1256
Iter-18300 train loss: 2.2394 valid loss: 2.2325, valid accuracy: 0.1256
Iter-18310 train loss: 2.2478 valid loss: 2.2325, valid accuracy: 0.1268
Iter-18320 train loss: 2.2294 valid loss: 2.2324, valid accuracy: 0.1266
Iter-18330 train loss: 2.2449 valid loss: 2.2324, v

Iter-19330 train loss: 2.2101 valid loss: 2.2232, valid accuracy: 0.1392
Iter-19340 train loss: 2.2334 valid loss: 2.2231, valid accuracy: 0.1398
Iter-19350 train loss: 2.2142 valid loss: 2.2231, valid accuracy: 0.1432


In [None]:
# # Display the learning curve and losses for training, validation, and testing
# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt

plt.plot(nn.losses['train'], label='Train loss')
plt.plot(nn.losses['valid'], label='Valid loss')
plt.legend()
plt.show()

In [None]:
plt.plot(nn.losses['valid_acc'], label='Valid accuracy')
plt.legend()
plt.show()