In [5]:
# Data
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import impl.layer as l

# Dataset preparation and pre-processing
mnist = input_data.read_data_sets('data/MNIST_data/', one_hot=False)

X_train, y_train = mnist.train.images, mnist.train.labels
X_val, y_val = mnist.validation.images, mnist.validation.labels
X_test, y_test = mnist.test.images, mnist.test.labels
# y_test.shape, y_val.shape, y_train.shape
# X_train.shape, X_train.dtype, X_val.shape, X_val.dtype, X_test.shape, X_test.dtype

Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz


In [6]:
# Pre-processing: normalizing
def normalize(X):
    # max scale for images 255= 2**8= 8 bit grayscale for each channel
    return (X - X.mean(axis=0)) #/ X.std(axis=0)

X_train, X_val, X_test = normalize(X=X_train), normalize(X=X_val), normalize(X=X_test)

In [None]:
# Model
import impl.layer as l # or from impl.layer import *
from impl.loss import * # import all functions from impl.loss file # import impl.loss as loss_func
from sklearn.utils import shuffle as skshuffle

class FFNN:

    def __init__(self, D, C, H, L):
        self.L = L # number of layers or depth
        self.losses = {'train':[], 'valid':[], 'valid_acc':[]}
        
        # The adaptive/learnable/updatable random feedforward
        self.model = []
        self.W_fixed = []
        self.grads = []
        self.ys_prev = []
        low, high = -1, 1
        
        # Input layer: weights/ biases
        m = dict(W=np.random.uniform(size=(D, H), low=low, high=high) / np.sqrt(D / 2.), b=np.zeros((1, H)))
        self.model.append(m)
        # Fixed feedback weight
        m = np.random.uniform(size=(D, H), low=low, high=high) / np.sqrt(D / 2.)
        self.W_fixed.append(m)
        # Input layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[0].items()})
        # Previous output layer
        self.ys_prev.append(0.0)

        # Hidden layers: weights/ biases
        m_L = []
        for _ in range(L):
            m = dict(W=np.random.uniform(size=(H, H), low=low, high=high) / np.sqrt(H / 2.), b=np.zeros((1, H)))
            m_L.append(m)
        self.model.append(m_L)
        # Fixed feedback weight
        m_L = []
        for _ in range(L):
            m = np.random.uniform(size=(H, H), low=low, high=high) / np.sqrt(H / 2.)
            m_L.append(m)
        self.W_fixed.append(m_L)
        # Hidden layer: gradients
        grad_L = []
        for _ in range(L):
            grad_L.append({key: np.zeros_like(val) for key, val in self.model[1][0].items()})
        self.grads.append(grad_L)
        # Previous output layer
        ys_prev_L = []
        for _ in range(L):
            ys_prev_L.append(0.0)
        self.ys_prev.append(ys_prev_L)
        
        # Output layer: weights/ biases
        m = dict(W=np.random.uniform(size=(H, C), low=low, high=high) / np.sqrt(H / 2.), b=np.zeros((1, C)))
        self.model.append(m)
        # Fixed feedback weight
        m = np.random.uniform(size=(H, C), low=low, high=high) / np.sqrt(H / 2.)
        self.W_fixed.append(m)
        # Outout layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[2].items()})
        # Previous output layer
        self.ys_prev.append(0.0)
        
    def fc_forward(self, X, W, b):
        out = (X @ W) + b
        cache = (W, X)
        return out, cache

    def fc_backward(self, dout, cache, W_fixed):
        W, X = cache

        dW = X.T @ dout
        db = np.sum(dout, axis=0).reshape(1, -1) # db_1xn
        
#         dX = dout @ W.T # Backprop
        dX = dout @ W_fixed.T # fb alignment

        return dX, dW, db

    def train_forward(self, X, train):
        caches, ys = [], []
        
        # Input layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[0]['W'], b=self.model[0]['b']) # X_1xD, y_1xc
        y, nl_cache = l.tanh_forward(X=y)
        if train:
            caches.append((fc_cache, nl_cache))
        ys.append(y) # ys[0]
        X = y.copy() # pass to the next layer
        
        # Hidden layers
        fc_caches, nl_caches, ys_L = [], [], []
        for layer in range(self.L):
            y, fc_cache = self.fc_forward(X=X, W=self.model[1][layer]['W'], b=self.model[1][layer]['b'])
            y, nl_cache = l.tanh_forward(X=y)
            ys_L.append(y) # ys[1][layer]
            X = y.copy() # pass to next layer
            if train:
                fc_caches.append(fc_cache)
                nl_caches.append(nl_cache)
        if train:
            caches.append((fc_caches, nl_caches)) # caches[1]            
        ys.append(ys_L) # ys[1]            
        
        # Output layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[2]['W'], b=self.model[2]['b'])
        if train:
            caches.append(fc_cache)
        ys.append(y) # ys[2]

        return ys, caches # for backpropating the error

    def loss_function(self, y, y_train):
        
        loss = cross_entropy(y, y_train) # softmax is included
        dy = dcross_entropy(y, y_train) # dsoftmax is included
        
        return loss, dy
        
    def train_backward(self, dy, caches, ys):
        grads, ys_prev = self.grads, self.ys_prev # initialized by Zero in every iteration/epoch
        
        # Output layer
        fc_cache = caches[2]
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache, W_fixed=self.W_fixed[2])
        dy = dX.copy()
        grads[2]['W'] = dW
        grads[2]['b'] = db

        # Hidden layer
        fc_caches, nl_caches = caches[1]
        for layer in reversed(range(self.L)):
#             dy *= ys[1][layer] - ys_prev[1][layer] # temporal diff instead of differentiable function
            dy = l.tanh_backward(cache=nl_caches[layer], dout=dy) # diffable function
            dX, dW, db = self.fc_backward(dout=dy, cache=fc_caches[layer], W_fixed=self.W_fixed[1][layer])
            dy = dX.copy()
            grads[1][layer]['W'] = dW
            grads[1][layer]['b'] = db
        
        # Input layer
        fc_cache, nl_cache = caches[0]
#         dy *= ys[0] - ys_prev[0] # temporal diff instead of differentiable function
        dy = l.tanh_backward(cache=nl_cache, dout=dy) # diffable function
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache, W_fixed=self.W_fixed[0])
        grads[0]['W'] = dW
        grads[0]['b'] = db

        return dX, grads
    
    def test(self, X):
        ys_logit, _ = self.train_forward(X, train=False)
        y_logit = ys_logit[2] # last layer
        
        # if self.mode == 'classification':
        y_prob = l.softmax(y_logit) # for accuracy == acc
        y_pred = np.argmax(y_prob, axis=1) # for loss ==err
        
        return y_pred, y_logit
        
    def get_minibatch(self, X, y, minibatch_size, shuffle):
        minibatches = []

        if shuffle:
            X, y = skshuffle(X, y)

        for i in range(0, X.shape[0], minibatch_size):
            X_mini = X[i:i + minibatch_size]
            y_mini = y[i:i + minibatch_size]
            minibatches.append((X_mini, y_mini))

        return minibatches

    def sgd(self, train_set, val_set, alpha, mb_size, n_iter, print_after):
        X_train, y_train = train_set
        X_val, y_val = val_set

        # Epochs
        for iter in range(1, n_iter + 1):

            # Minibatches
            minibatches = self.get_minibatch(X_train, y_train, mb_size, shuffle=True)
            idx = np.random.randint(0, len(minibatches))
            X_mini, y_mini = minibatches[idx]
            
            # Train the model
            ys, caches = self.train_forward(X_mini, train=True)
            loss, dy = self.loss_function(ys[2], y_mini)
            _, grads = self.train_backward(dy, caches, ys) # ys[0], ys[1] and ys_prev are used for backprop
            self.ys_prev = ys # for next iteration or epoch learning dW and db
            self.losses['train'].append(loss)
            
            # Update the model for input layer
            for key in grads[0].keys():
                self.model[0][key] -= alpha * grads[0][key]

            # Update the model for the hidden layers
            for layer in range(self.L):
                for key in grads[1][layer].keys():
                    self.model[1][layer][key] -= alpha * grads[1][layer][key]

            # Update the model for output layer
            for key in grads[2].keys():
                self.model[2][key] -= alpha * grads[2][key]
                
            # Validate the updated model
            y_pred, y_logit = self.test(X_val)
            valid_loss, _ = self.loss_function(y_logit, y_val) # softmax is included in entropy loss function
            self.losses['valid'].append(valid_loss)
            valid_acc = np.mean(y_pred == y_val) # confusion matrix
            self.losses['valid_acc'].append(valid_acc)
            
            # Print the model info: loss & accuracy or err & acc
            if iter % print_after == 0:
                print('Iter-{} train loss: {:.4f} valid loss: {:.4f}, valid accuracy: {:.4f}'.format(
                    iter, loss, valid_loss, valid_acc))

        # Test the final model
        y_pred, y_logit = nn.test(X_test)
        loss, _ = self.loss_function(y_logit, y_test) # softmax is included in entropy loss function
        acc = np.mean(y_pred == y_test)
        print('Last iteration - Test accuracy mean: {:.4f}, std: {:.4f}, loss: {:.4f}'.format(
            acc.mean(), acc.std(), loss))

In [None]:
# Hyper-parameters
n_iter = 1000000 # number of epochs
alpha = 1e-3 # learning_rate
mb_size = 50 # 2**10==1024 # width, timestep for sequential data or minibatch size
print_after = 100 # n_iter//10 # print loss for train, valid, and test
num_hidden_units = 32 # number of kernels/ filters in each layer
num_input_units = X_train.shape[1] # noise added at the input lavel as input noise we can use dX or for more improvement
num_output_units = y_train.max() + 1 # number of classes in this classification problem
num_layers = 2 # depth 

# Build the model/NN and learn it: running session.
nn = FFNN(C=num_output_units, D=num_input_units, H=num_hidden_units, L=num_layers)

nn.sgd(train_set=(X_train, y_train), val_set=(X_val, y_val), mb_size=mb_size, alpha=alpha, 
           n_iter=n_iter, print_after=print_after)

Iter-100 train loss: 2.3097 valid loss: 2.3107, valid accuracy: 0.0802
Iter-200 train loss: 2.3105 valid loss: 2.3123, valid accuracy: 0.0812
Iter-300 train loss: 2.3117 valid loss: 2.3137, valid accuracy: 0.0794
Iter-400 train loss: 2.3183 valid loss: 2.3149, valid accuracy: 0.0798
Iter-500 train loss: 2.3100 valid loss: 2.3159, valid accuracy: 0.0792
Iter-600 train loss: 2.3147 valid loss: 2.3165, valid accuracy: 0.0796
Iter-700 train loss: 2.3244 valid loss: 2.3167, valid accuracy: 0.0794
Iter-800 train loss: 2.3077 valid loss: 2.3166, valid accuracy: 0.0806
Iter-900 train loss: 2.3267 valid loss: 2.3158, valid accuracy: 0.0836
Iter-1000 train loss: 2.3127 valid loss: 2.3146, valid accuracy: 0.0854
Iter-1100 train loss: 2.3310 valid loss: 2.3131, valid accuracy: 0.0862
Iter-1200 train loss: 2.3438 valid loss: 2.3109, valid accuracy: 0.0872
Iter-1300 train loss: 2.2932 valid loss: 2.3082, valid accuracy: 0.0890
Iter-1400 train loss: 2.2944 valid loss: 2.3050, valid accuracy: 0.0906
I

Iter-11500 train loss: 1.3247 valid loss: 1.2044, valid accuracy: 0.6922
Iter-11600 train loss: 1.3651 valid loss: 1.1956, valid accuracy: 0.6960
Iter-11700 train loss: 1.1599 valid loss: 1.1867, valid accuracy: 0.6964
Iter-11800 train loss: 1.0998 valid loss: 1.1777, valid accuracy: 0.6988
Iter-11900 train loss: 1.0984 valid loss: 1.1693, valid accuracy: 0.7012
Iter-12000 train loss: 1.1258 valid loss: 1.1610, valid accuracy: 0.7036
Iter-12100 train loss: 1.1328 valid loss: 1.1527, valid accuracy: 0.7074
Iter-12200 train loss: 1.0046 valid loss: 1.1446, valid accuracy: 0.7100
Iter-12300 train loss: 1.2347 valid loss: 1.1365, valid accuracy: 0.7122
Iter-12400 train loss: 1.2578 valid loss: 1.1288, valid accuracy: 0.7130
Iter-12500 train loss: 1.0239 valid loss: 1.1210, valid accuracy: 0.7132
Iter-12600 train loss: 1.1510 valid loss: 1.1133, valid accuracy: 0.7160
Iter-12700 train loss: 1.0684 valid loss: 1.1057, valid accuracy: 0.7178
Iter-12800 train loss: 1.0345 valid loss: 1.0983, v

Iter-22800 train loss: 0.7429 valid loss: 0.7031, valid accuracy: 0.8172
Iter-22900 train loss: 0.8107 valid loss: 0.7010, valid accuracy: 0.8182
Iter-23000 train loss: 0.6681 valid loss: 0.6989, valid accuracy: 0.8196
Iter-23100 train loss: 0.6926 valid loss: 0.6969, valid accuracy: 0.8206
Iter-23200 train loss: 0.7128 valid loss: 0.6950, valid accuracy: 0.8212
Iter-23300 train loss: 0.6668 valid loss: 0.6930, valid accuracy: 0.8222
Iter-23400 train loss: 0.8144 valid loss: 0.6910, valid accuracy: 0.8224
Iter-23500 train loss: 0.6615 valid loss: 0.6891, valid accuracy: 0.8232
Iter-23600 train loss: 0.7539 valid loss: 0.6871, valid accuracy: 0.8230
Iter-23700 train loss: 0.6680 valid loss: 0.6851, valid accuracy: 0.8238
Iter-23800 train loss: 0.7885 valid loss: 0.6833, valid accuracy: 0.8244
Iter-23900 train loss: 0.7384 valid loss: 0.6814, valid accuracy: 0.8254
Iter-24000 train loss: 0.7731 valid loss: 0.6795, valid accuracy: 0.8258
Iter-24100 train loss: 0.4166 valid loss: 0.6776, v

Iter-34100 train loss: 0.7009 valid loss: 0.5455, valid accuracy: 0.8584
Iter-34200 train loss: 0.6587 valid loss: 0.5445, valid accuracy: 0.8588
Iter-34300 train loss: 0.6210 valid loss: 0.5436, valid accuracy: 0.8588
Iter-34400 train loss: 0.5044 valid loss: 0.5426, valid accuracy: 0.8588
Iter-34500 train loss: 0.7957 valid loss: 0.5417, valid accuracy: 0.8588
Iter-34600 train loss: 0.8407 valid loss: 0.5408, valid accuracy: 0.8590
Iter-34700 train loss: 0.4476 valid loss: 0.5400, valid accuracy: 0.8588
Iter-34800 train loss: 0.5602 valid loss: 0.5391, valid accuracy: 0.8588
Iter-34900 train loss: 0.6028 valid loss: 0.5382, valid accuracy: 0.8590
Iter-35000 train loss: 0.5992 valid loss: 0.5374, valid accuracy: 0.8594
Iter-35100 train loss: 0.4778 valid loss: 0.5365, valid accuracy: 0.8598
Iter-35200 train loss: 0.5871 valid loss: 0.5356, valid accuracy: 0.8596
Iter-35300 train loss: 0.5205 valid loss: 0.5347, valid accuracy: 0.8596
Iter-35400 train loss: 0.6746 valid loss: 0.5339, v

Iter-45400 train loss: 0.6291 valid loss: 0.4667, valid accuracy: 0.8742
Iter-45500 train loss: 0.4978 valid loss: 0.4663, valid accuracy: 0.8746
Iter-45600 train loss: 0.7108 valid loss: 0.4658, valid accuracy: 0.8742
Iter-45700 train loss: 0.3638 valid loss: 0.4652, valid accuracy: 0.8746
Iter-45800 train loss: 0.4252 valid loss: 0.4647, valid accuracy: 0.8748
Iter-45900 train loss: 0.3812 valid loss: 0.4643, valid accuracy: 0.8750
Iter-46000 train loss: 0.6263 valid loss: 0.4638, valid accuracy: 0.8752
Iter-46100 train loss: 0.4347 valid loss: 0.4632, valid accuracy: 0.8748
Iter-46200 train loss: 0.3484 valid loss: 0.4627, valid accuracy: 0.8750
Iter-46300 train loss: 0.6050 valid loss: 0.4622, valid accuracy: 0.8752
Iter-46400 train loss: 0.3885 valid loss: 0.4617, valid accuracy: 0.8752
Iter-46500 train loss: 0.5451 valid loss: 0.4612, valid accuracy: 0.8756
Iter-46600 train loss: 0.6189 valid loss: 0.4608, valid accuracy: 0.8758
Iter-46700 train loss: 0.5679 valid loss: 0.4602, v

Iter-56700 train loss: 0.3863 valid loss: 0.4203, valid accuracy: 0.8860
Iter-56800 train loss: 0.4496 valid loss: 0.4199, valid accuracy: 0.8860
Iter-56900 train loss: 0.6046 valid loss: 0.4196, valid accuracy: 0.8860
Iter-57000 train loss: 0.4050 valid loss: 0.4193, valid accuracy: 0.8860
Iter-57100 train loss: 0.2153 valid loss: 0.4190, valid accuracy: 0.8860
Iter-57200 train loss: 0.3561 valid loss: 0.4187, valid accuracy: 0.8864
Iter-57300 train loss: 0.3366 valid loss: 0.4183, valid accuracy: 0.8864
Iter-57400 train loss: 0.5663 valid loss: 0.4180, valid accuracy: 0.8866
Iter-57500 train loss: 0.4289 valid loss: 0.4177, valid accuracy: 0.8866
Iter-57600 train loss: 0.4591 valid loss: 0.4173, valid accuracy: 0.8870
Iter-57700 train loss: 0.5398 valid loss: 0.4170, valid accuracy: 0.8874
Iter-57800 train loss: 0.3092 valid loss: 0.4167, valid accuracy: 0.8876
Iter-57900 train loss: 0.3132 valid loss: 0.4164, valid accuracy: 0.8878
Iter-58000 train loss: 0.3570 valid loss: 0.4160, v

Iter-68000 train loss: 0.2968 valid loss: 0.3891, valid accuracy: 0.8950
Iter-68100 train loss: 0.4216 valid loss: 0.3889, valid accuracy: 0.8952
Iter-68200 train loss: 0.4610 valid loss: 0.3887, valid accuracy: 0.8948
Iter-68300 train loss: 0.4470 valid loss: 0.3885, valid accuracy: 0.8948
Iter-68400 train loss: 0.2943 valid loss: 0.3882, valid accuracy: 0.8948
Iter-68500 train loss: 0.4589 valid loss: 0.3880, valid accuracy: 0.8952
Iter-68600 train loss: 0.4615 valid loss: 0.3877, valid accuracy: 0.8948
Iter-68700 train loss: 0.2702 valid loss: 0.3875, valid accuracy: 0.8950
Iter-68800 train loss: 0.3380 valid loss: 0.3873, valid accuracy: 0.8950
Iter-68900 train loss: 0.4043 valid loss: 0.3870, valid accuracy: 0.8948
Iter-69000 train loss: 0.4431 valid loss: 0.3868, valid accuracy: 0.8950
Iter-69100 train loss: 0.2189 valid loss: 0.3866, valid accuracy: 0.8950
Iter-69200 train loss: 0.3907 valid loss: 0.3864, valid accuracy: 0.8950
Iter-69300 train loss: 0.4518 valid loss: 0.3862, v

Iter-79300 train loss: 0.5249 valid loss: 0.3670, valid accuracy: 0.8988
Iter-79400 train loss: 0.4023 valid loss: 0.3668, valid accuracy: 0.8988
Iter-79500 train loss: 0.3274 valid loss: 0.3666, valid accuracy: 0.8990
Iter-79600 train loss: 0.4314 valid loss: 0.3665, valid accuracy: 0.8990
Iter-79700 train loss: 0.4907 valid loss: 0.3663, valid accuracy: 0.8992
Iter-79800 train loss: 0.3104 valid loss: 0.3661, valid accuracy: 0.8990
Iter-79900 train loss: 0.3974 valid loss: 0.3660, valid accuracy: 0.8994
Iter-80000 train loss: 0.3577 valid loss: 0.3658, valid accuracy: 0.8994
Iter-80100 train loss: 0.2916 valid loss: 0.3656, valid accuracy: 0.8994
Iter-80200 train loss: 0.2348 valid loss: 0.3654, valid accuracy: 0.8996
Iter-80300 train loss: 0.4114 valid loss: 0.3653, valid accuracy: 0.8998
Iter-80400 train loss: 0.4210 valid loss: 0.3651, valid accuracy: 0.8998
Iter-80500 train loss: 0.2870 valid loss: 0.3650, valid accuracy: 0.8998
Iter-80600 train loss: 0.6465 valid loss: 0.3648, v

Iter-90600 train loss: 0.2214 valid loss: 0.3505, valid accuracy: 0.9030
Iter-90700 train loss: 0.3315 valid loss: 0.3503, valid accuracy: 0.9028
Iter-90800 train loss: 0.2235 valid loss: 0.3502, valid accuracy: 0.9032
Iter-90900 train loss: 0.3180 valid loss: 0.3501, valid accuracy: 0.9034
Iter-91000 train loss: 0.3777 valid loss: 0.3499, valid accuracy: 0.9032
Iter-91100 train loss: 0.2408 valid loss: 0.3498, valid accuracy: 0.9034
Iter-91200 train loss: 0.4084 valid loss: 0.3497, valid accuracy: 0.9036
Iter-91300 train loss: 0.3907 valid loss: 0.3495, valid accuracy: 0.9034
Iter-91400 train loss: 0.2347 valid loss: 0.3494, valid accuracy: 0.9034
Iter-91500 train loss: 0.3025 valid loss: 0.3493, valid accuracy: 0.9034
Iter-91600 train loss: 0.4232 valid loss: 0.3492, valid accuracy: 0.9032
Iter-91700 train loss: 0.5622 valid loss: 0.3490, valid accuracy: 0.9034
Iter-91800 train loss: 0.3410 valid loss: 0.3489, valid accuracy: 0.9032
Iter-91900 train loss: 0.4080 valid loss: 0.3488, v

Iter-101800 train loss: 0.1969 valid loss: 0.3376, valid accuracy: 0.9050
Iter-101900 train loss: 0.2419 valid loss: 0.3375, valid accuracy: 0.9048
Iter-102000 train loss: 0.4940 valid loss: 0.3374, valid accuracy: 0.9048
Iter-102100 train loss: 0.5742 valid loss: 0.3373, valid accuracy: 0.9050
Iter-102200 train loss: 0.3975 valid loss: 0.3372, valid accuracy: 0.9050
Iter-102300 train loss: 0.3200 valid loss: 0.3371, valid accuracy: 0.9052
Iter-102400 train loss: 0.3052 valid loss: 0.3370, valid accuracy: 0.9050
Iter-102500 train loss: 0.4042 valid loss: 0.3369, valid accuracy: 0.9050
Iter-102600 train loss: 0.2413 valid loss: 0.3368, valid accuracy: 0.9052
Iter-102700 train loss: 0.4718 valid loss: 0.3367, valid accuracy: 0.9050
Iter-102800 train loss: 0.3315 valid loss: 0.3366, valid accuracy: 0.9050
Iter-102900 train loss: 0.3535 valid loss: 0.3364, valid accuracy: 0.9054
Iter-103000 train loss: 0.3334 valid loss: 0.3363, valid accuracy: 0.9054
Iter-103100 train loss: 0.3681 valid l

Iter-112900 train loss: 0.3263 valid loss: 0.3271, valid accuracy: 0.9068
Iter-113000 train loss: 0.3020 valid loss: 0.3270, valid accuracy: 0.9070
Iter-113100 train loss: 0.5083 valid loss: 0.3269, valid accuracy: 0.9068
Iter-113200 train loss: 0.3295 valid loss: 0.3268, valid accuracy: 0.9068
Iter-113300 train loss: 0.4462 valid loss: 0.3268, valid accuracy: 0.9068
Iter-113400 train loss: 0.2901 valid loss: 0.3267, valid accuracy: 0.9068
Iter-113500 train loss: 0.3600 valid loss: 0.3266, valid accuracy: 0.9070
Iter-113600 train loss: 0.3002 valid loss: 0.3265, valid accuracy: 0.9070
Iter-113700 train loss: 0.2400 valid loss: 0.3264, valid accuracy: 0.9070
Iter-113800 train loss: 0.3559 valid loss: 0.3264, valid accuracy: 0.9072
Iter-113900 train loss: 0.4211 valid loss: 0.3263, valid accuracy: 0.9072
Iter-114000 train loss: 0.1772 valid loss: 0.3262, valid accuracy: 0.9072
Iter-114100 train loss: 0.3987 valid loss: 0.3261, valid accuracy: 0.9074
Iter-114200 train loss: 0.5609 valid l

Iter-124000 train loss: 0.2957 valid loss: 0.3182, valid accuracy: 0.9092
Iter-124100 train loss: 0.2613 valid loss: 0.3181, valid accuracy: 0.9090
Iter-124200 train loss: 0.3599 valid loss: 0.3181, valid accuracy: 0.9094
Iter-124300 train loss: 0.3267 valid loss: 0.3180, valid accuracy: 0.9090
Iter-124400 train loss: 0.5298 valid loss: 0.3180, valid accuracy: 0.9090
Iter-124500 train loss: 0.1305 valid loss: 0.3179, valid accuracy: 0.9090
Iter-124600 train loss: 0.2531 valid loss: 0.3178, valid accuracy: 0.9092
Iter-124700 train loss: 0.3202 valid loss: 0.3177, valid accuracy: 0.9094
Iter-124800 train loss: 0.5862 valid loss: 0.3177, valid accuracy: 0.9092
Iter-124900 train loss: 0.3092 valid loss: 0.3176, valid accuracy: 0.9094
Iter-125000 train loss: 0.5500 valid loss: 0.3175, valid accuracy: 0.9094
Iter-125100 train loss: 0.1468 valid loss: 0.3174, valid accuracy: 0.9094
Iter-125200 train loss: 0.5792 valid loss: 0.3174, valid accuracy: 0.9094
Iter-125300 train loss: 0.4841 valid l

Iter-135100 train loss: 0.2792 valid loss: 0.3107, valid accuracy: 0.9112
Iter-135200 train loss: 0.2672 valid loss: 0.3107, valid accuracy: 0.9112
Iter-135300 train loss: 0.4092 valid loss: 0.3106, valid accuracy: 0.9110
Iter-135400 train loss: 0.3943 valid loss: 0.3105, valid accuracy: 0.9110
Iter-135500 train loss: 0.2628 valid loss: 0.3105, valid accuracy: 0.9110
Iter-135600 train loss: 0.4005 valid loss: 0.3104, valid accuracy: 0.9108
Iter-135700 train loss: 0.3193 valid loss: 0.3104, valid accuracy: 0.9108
Iter-135800 train loss: 0.2310 valid loss: 0.3103, valid accuracy: 0.9110
Iter-135900 train loss: 0.3378 valid loss: 0.3103, valid accuracy: 0.9110
Iter-136000 train loss: 0.3225 valid loss: 0.3102, valid accuracy: 0.9112
Iter-136100 train loss: 0.4365 valid loss: 0.3101, valid accuracy: 0.9112
Iter-136200 train loss: 0.2082 valid loss: 0.3101, valid accuracy: 0.9114
Iter-136300 train loss: 0.2320 valid loss: 0.3100, valid accuracy: 0.9114
Iter-136400 train loss: 0.3039 valid l

Iter-146200 train loss: 0.3242 valid loss: 0.3043, valid accuracy: 0.9124
Iter-146300 train loss: 0.4379 valid loss: 0.3043, valid accuracy: 0.9122
Iter-146400 train loss: 0.2970 valid loss: 0.3042, valid accuracy: 0.9124
Iter-146500 train loss: 0.1786 valid loss: 0.3041, valid accuracy: 0.9124
Iter-146600 train loss: 0.1604 valid loss: 0.3041, valid accuracy: 0.9128
Iter-146700 train loss: 0.5566 valid loss: 0.3040, valid accuracy: 0.9128
Iter-146800 train loss: 0.4234 valid loss: 0.3040, valid accuracy: 0.9128
Iter-146900 train loss: 0.4259 valid loss: 0.3039, valid accuracy: 0.9128
Iter-147000 train loss: 0.3165 valid loss: 0.3038, valid accuracy: 0.9128
Iter-147100 train loss: 0.2720 valid loss: 0.3038, valid accuracy: 0.9128
Iter-147200 train loss: 0.2646 valid loss: 0.3038, valid accuracy: 0.9132
Iter-147300 train loss: 0.2444 valid loss: 0.3037, valid accuracy: 0.9130
Iter-147400 train loss: 0.4973 valid loss: 0.3036, valid accuracy: 0.9130
Iter-147500 train loss: 0.5332 valid l

Iter-157300 train loss: 0.2816 valid loss: 0.2986, valid accuracy: 0.9142
Iter-157400 train loss: 0.2725 valid loss: 0.2986, valid accuracy: 0.9142
Iter-157500 train loss: 0.2174 valid loss: 0.2986, valid accuracy: 0.9140
Iter-157600 train loss: 0.2943 valid loss: 0.2985, valid accuracy: 0.9142
Iter-157700 train loss: 0.1984 valid loss: 0.2985, valid accuracy: 0.9142
Iter-157800 train loss: 0.3246 valid loss: 0.2984, valid accuracy: 0.9142
Iter-157900 train loss: 0.1528 valid loss: 0.2984, valid accuracy: 0.9142
Iter-158000 train loss: 0.3811 valid loss: 0.2983, valid accuracy: 0.9142
Iter-158100 train loss: 0.6025 valid loss: 0.2983, valid accuracy: 0.9142
Iter-158200 train loss: 0.2339 valid loss: 0.2982, valid accuracy: 0.9142
Iter-158300 train loss: 0.2838 valid loss: 0.2981, valid accuracy: 0.9142
Iter-158400 train loss: 0.1870 valid loss: 0.2981, valid accuracy: 0.9140
Iter-158500 train loss: 0.2698 valid loss: 0.2981, valid accuracy: 0.9144
Iter-158600 train loss: 0.5086 valid l

Iter-168400 train loss: 0.3189 valid loss: 0.2937, valid accuracy: 0.9156
Iter-168500 train loss: 0.1205 valid loss: 0.2936, valid accuracy: 0.9156
Iter-168600 train loss: 0.5054 valid loss: 0.2936, valid accuracy: 0.9156
Iter-168700 train loss: 0.1973 valid loss: 0.2936, valid accuracy: 0.9158
Iter-168800 train loss: 0.2972 valid loss: 0.2935, valid accuracy: 0.9158
Iter-168900 train loss: 0.1643 valid loss: 0.2935, valid accuracy: 0.9158
Iter-169000 train loss: 0.3459 valid loss: 0.2935, valid accuracy: 0.9156
Iter-169100 train loss: 0.1572 valid loss: 0.2934, valid accuracy: 0.9156
Iter-169200 train loss: 0.2127 valid loss: 0.2934, valid accuracy: 0.9158
Iter-169300 train loss: 0.3905 valid loss: 0.2933, valid accuracy: 0.9164
Iter-169400 train loss: 0.6692 valid loss: 0.2933, valid accuracy: 0.9160
Iter-169500 train loss: 0.1664 valid loss: 0.2933, valid accuracy: 0.9162
Iter-169600 train loss: 0.3784 valid loss: 0.2932, valid accuracy: 0.9160
Iter-169700 train loss: 0.3672 valid l

Iter-179500 train loss: 0.2049 valid loss: 0.2892, valid accuracy: 0.9166
Iter-179600 train loss: 0.1960 valid loss: 0.2892, valid accuracy: 0.9164
Iter-179700 train loss: 0.4711 valid loss: 0.2892, valid accuracy: 0.9166
Iter-179800 train loss: 0.4485 valid loss: 0.2891, valid accuracy: 0.9168
Iter-179900 train loss: 0.2336 valid loss: 0.2891, valid accuracy: 0.9166
Iter-180000 train loss: 0.2881 valid loss: 0.2890, valid accuracy: 0.9166
Iter-180100 train loss: 0.1678 valid loss: 0.2890, valid accuracy: 0.9166
Iter-180200 train loss: 0.2896 valid loss: 0.2889, valid accuracy: 0.9166
Iter-180300 train loss: 0.5205 valid loss: 0.2889, valid accuracy: 0.9168
Iter-180400 train loss: 0.2005 valid loss: 0.2889, valid accuracy: 0.9170
Iter-180500 train loss: 0.2728 valid loss: 0.2888, valid accuracy: 0.9170
Iter-180600 train loss: 0.2630 valid loss: 0.2888, valid accuracy: 0.9170
Iter-180700 train loss: 0.4666 valid loss: 0.2888, valid accuracy: 0.9172
Iter-180800 train loss: 0.3783 valid l

Iter-190600 train loss: 0.3625 valid loss: 0.2852, valid accuracy: 0.9180
Iter-190700 train loss: 0.3262 valid loss: 0.2852, valid accuracy: 0.9180
Iter-190800 train loss: 0.2726 valid loss: 0.2852, valid accuracy: 0.9180
Iter-190900 train loss: 0.3838 valid loss: 0.2852, valid accuracy: 0.9180
Iter-191000 train loss: 0.2520 valid loss: 0.2851, valid accuracy: 0.9180
Iter-191100 train loss: 0.3381 valid loss: 0.2851, valid accuracy: 0.9182
Iter-191200 train loss: 0.2199 valid loss: 0.2851, valid accuracy: 0.9182
Iter-191300 train loss: 0.4090 valid loss: 0.2850, valid accuracy: 0.9182
Iter-191400 train loss: 0.4580 valid loss: 0.2850, valid accuracy: 0.9184
Iter-191500 train loss: 0.2672 valid loss: 0.2850, valid accuracy: 0.9180
Iter-191600 train loss: 0.1934 valid loss: 0.2850, valid accuracy: 0.9180
Iter-191700 train loss: 0.2588 valid loss: 0.2850, valid accuracy: 0.9180
Iter-191800 train loss: 0.3215 valid loss: 0.2849, valid accuracy: 0.9180
Iter-191900 train loss: 0.3153 valid l

Iter-201700 train loss: 0.5306 valid loss: 0.2816, valid accuracy: 0.9184
Iter-201800 train loss: 0.1501 valid loss: 0.2816, valid accuracy: 0.9184
Iter-201900 train loss: 0.2423 valid loss: 0.2815, valid accuracy: 0.9182
Iter-202000 train loss: 0.2653 valid loss: 0.2815, valid accuracy: 0.9184
Iter-202100 train loss: 0.2683 valid loss: 0.2815, valid accuracy: 0.9182
Iter-202200 train loss: 0.2567 valid loss: 0.2814, valid accuracy: 0.9186
Iter-202300 train loss: 0.2200 valid loss: 0.2814, valid accuracy: 0.9186
Iter-202400 train loss: 0.5317 valid loss: 0.2814, valid accuracy: 0.9186
Iter-202500 train loss: 0.3207 valid loss: 0.2813, valid accuracy: 0.9186
Iter-202600 train loss: 0.3413 valid loss: 0.2813, valid accuracy: 0.9190
Iter-202700 train loss: 0.3058 valid loss: 0.2812, valid accuracy: 0.9188
Iter-202800 train loss: 0.2847 valid loss: 0.2812, valid accuracy: 0.9188
Iter-202900 train loss: 0.3111 valid loss: 0.2812, valid accuracy: 0.9186
Iter-203000 train loss: 0.4458 valid l

Iter-212800 train loss: 0.1691 valid loss: 0.2783, valid accuracy: 0.9192
Iter-212900 train loss: 0.1998 valid loss: 0.2782, valid accuracy: 0.9192
Iter-213000 train loss: 0.2510 valid loss: 0.2782, valid accuracy: 0.9192
Iter-213100 train loss: 0.3863 valid loss: 0.2782, valid accuracy: 0.9192
Iter-213200 train loss: 0.2633 valid loss: 0.2781, valid accuracy: 0.9192
Iter-213300 train loss: 0.3440 valid loss: 0.2781, valid accuracy: 0.9190
Iter-213400 train loss: 0.2240 valid loss: 0.2781, valid accuracy: 0.9190
Iter-213500 train loss: 0.4355 valid loss: 0.2781, valid accuracy: 0.9188
Iter-213600 train loss: 0.2479 valid loss: 0.2780, valid accuracy: 0.9192
Iter-213700 train loss: 0.6723 valid loss: 0.2780, valid accuracy: 0.9190
Iter-213800 train loss: 0.4997 valid loss: 0.2780, valid accuracy: 0.9190
Iter-213900 train loss: 0.1684 valid loss: 0.2780, valid accuracy: 0.9192
Iter-214000 train loss: 0.1793 valid loss: 0.2779, valid accuracy: 0.9190
Iter-214100 train loss: 0.3008 valid l

Iter-223900 train loss: 0.3286 valid loss: 0.2751, valid accuracy: 0.9200
Iter-224000 train loss: 0.2873 valid loss: 0.2751, valid accuracy: 0.9200
Iter-224100 train loss: 0.2024 valid loss: 0.2750, valid accuracy: 0.9200
Iter-224200 train loss: 0.1909 valid loss: 0.2750, valid accuracy: 0.9200
Iter-224300 train loss: 0.4835 valid loss: 0.2750, valid accuracy: 0.9200
Iter-224400 train loss: 0.1831 valid loss: 0.2750, valid accuracy: 0.9198
Iter-224500 train loss: 0.5191 valid loss: 0.2750, valid accuracy: 0.9198
Iter-224600 train loss: 0.1832 valid loss: 0.2750, valid accuracy: 0.9198
Iter-224700 train loss: 0.4833 valid loss: 0.2750, valid accuracy: 0.9198
Iter-224800 train loss: 0.3692 valid loss: 0.2749, valid accuracy: 0.9196
Iter-224900 train loss: 0.2912 valid loss: 0.2749, valid accuracy: 0.9198
Iter-225000 train loss: 0.3697 valid loss: 0.2749, valid accuracy: 0.9198
Iter-225100 train loss: 0.2832 valid loss: 0.2748, valid accuracy: 0.9198
Iter-225200 train loss: 0.2733 valid l

Iter-235000 train loss: 0.3122 valid loss: 0.2725, valid accuracy: 0.9204
Iter-235100 train loss: 0.2077 valid loss: 0.2725, valid accuracy: 0.9202
Iter-235200 train loss: 0.4105 valid loss: 0.2725, valid accuracy: 0.9198
Iter-235300 train loss: 0.3780 valid loss: 0.2725, valid accuracy: 0.9198
Iter-235400 train loss: 0.2048 valid loss: 0.2725, valid accuracy: 0.9200
Iter-235500 train loss: 0.5882 valid loss: 0.2725, valid accuracy: 0.9200
Iter-235600 train loss: 0.1280 valid loss: 0.2725, valid accuracy: 0.9198
Iter-235700 train loss: 0.4640 valid loss: 0.2724, valid accuracy: 0.9198
Iter-235800 train loss: 0.2975 valid loss: 0.2724, valid accuracy: 0.9198
Iter-235900 train loss: 0.2190 valid loss: 0.2724, valid accuracy: 0.9200
Iter-236000 train loss: 0.2603 valid loss: 0.2723, valid accuracy: 0.9200
Iter-236100 train loss: 0.2775 valid loss: 0.2723, valid accuracy: 0.9198
Iter-236200 train loss: 0.1706 valid loss: 0.2723, valid accuracy: 0.9196
Iter-236300 train loss: 0.3994 valid l

Iter-246100 train loss: 0.3497 valid loss: 0.2698, valid accuracy: 0.9204
Iter-246200 train loss: 0.1159 valid loss: 0.2698, valid accuracy: 0.9202
Iter-246300 train loss: 0.2615 valid loss: 0.2697, valid accuracy: 0.9202
Iter-246400 train loss: 0.1827 valid loss: 0.2697, valid accuracy: 0.9202
Iter-246500 train loss: 0.3882 valid loss: 0.2697, valid accuracy: 0.9202
Iter-246600 train loss: 0.2638 valid loss: 0.2697, valid accuracy: 0.9202
Iter-246700 train loss: 0.2264 valid loss: 0.2696, valid accuracy: 0.9202
Iter-246800 train loss: 0.2552 valid loss: 0.2696, valid accuracy: 0.9202
Iter-246900 train loss: 0.6002 valid loss: 0.2696, valid accuracy: 0.9202
Iter-247000 train loss: 0.4187 valid loss: 0.2695, valid accuracy: 0.9202
Iter-247100 train loss: 0.2047 valid loss: 0.2695, valid accuracy: 0.9202
Iter-247200 train loss: 0.2151 valid loss: 0.2695, valid accuracy: 0.9202
Iter-247300 train loss: 0.5088 valid loss: 0.2695, valid accuracy: 0.9202
Iter-247400 train loss: 0.2173 valid l

Iter-257200 train loss: 0.2666 valid loss: 0.2672, valid accuracy: 0.9210
Iter-257300 train loss: 0.2382 valid loss: 0.2672, valid accuracy: 0.9208
Iter-257400 train loss: 0.3070 valid loss: 0.2672, valid accuracy: 0.9208
Iter-257500 train loss: 0.1681 valid loss: 0.2672, valid accuracy: 0.9208
Iter-257600 train loss: 0.3744 valid loss: 0.2671, valid accuracy: 0.9212
Iter-257700 train loss: 0.3883 valid loss: 0.2671, valid accuracy: 0.9208
Iter-257800 train loss: 0.1910 valid loss: 0.2670, valid accuracy: 0.9208
Iter-257900 train loss: 0.2717 valid loss: 0.2670, valid accuracy: 0.9208
Iter-258000 train loss: 0.4641 valid loss: 0.2670, valid accuracy: 0.9212
Iter-258100 train loss: 0.2554 valid loss: 0.2670, valid accuracy: 0.9210
Iter-258200 train loss: 0.1316 valid loss: 0.2670, valid accuracy: 0.9210
Iter-258300 train loss: 0.3308 valid loss: 0.2670, valid accuracy: 0.9210
Iter-258400 train loss: 0.1536 valid loss: 0.2669, valid accuracy: 0.9210
Iter-258500 train loss: 0.5501 valid l

Iter-268300 train loss: 0.5782 valid loss: 0.2647, valid accuracy: 0.9224
Iter-268400 train loss: 0.1793 valid loss: 0.2647, valid accuracy: 0.9226
Iter-268500 train loss: 0.2244 valid loss: 0.2647, valid accuracy: 0.9226
Iter-268600 train loss: 0.5979 valid loss: 0.2646, valid accuracy: 0.9224
Iter-268700 train loss: 0.4047 valid loss: 0.2646, valid accuracy: 0.9224
Iter-268800 train loss: 0.3711 valid loss: 0.2646, valid accuracy: 0.9226
Iter-268900 train loss: 0.3985 valid loss: 0.2646, valid accuracy: 0.9226
Iter-269000 train loss: 0.2142 valid loss: 0.2645, valid accuracy: 0.9224
Iter-269100 train loss: 0.3323 valid loss: 0.2645, valid accuracy: 0.9224
Iter-269200 train loss: 0.2993 valid loss: 0.2645, valid accuracy: 0.9224
Iter-269300 train loss: 0.2850 valid loss: 0.2645, valid accuracy: 0.9224
Iter-269400 train loss: 0.2738 valid loss: 0.2645, valid accuracy: 0.9226
Iter-269500 train loss: 0.1801 valid loss: 0.2644, valid accuracy: 0.9226
Iter-269600 train loss: 0.0867 valid l

Iter-279400 train loss: 0.3080 valid loss: 0.2624, valid accuracy: 0.9226
Iter-279500 train loss: 0.2222 valid loss: 0.2624, valid accuracy: 0.9226
Iter-279600 train loss: 0.2353 valid loss: 0.2624, valid accuracy: 0.9224
Iter-279700 train loss: 0.2513 valid loss: 0.2624, valid accuracy: 0.9226
Iter-279800 train loss: 0.2826 valid loss: 0.2624, valid accuracy: 0.9226
Iter-279900 train loss: 0.0808 valid loss: 0.2624, valid accuracy: 0.9228
Iter-280000 train loss: 0.3128 valid loss: 0.2623, valid accuracy: 0.9226
Iter-280100 train loss: 0.2038 valid loss: 0.2623, valid accuracy: 0.9226
Iter-280200 train loss: 0.2299 valid loss: 0.2623, valid accuracy: 0.9224
Iter-280300 train loss: 0.3130 valid loss: 0.2623, valid accuracy: 0.9224
Iter-280400 train loss: 0.3369 valid loss: 0.2623, valid accuracy: 0.9226
Iter-280500 train loss: 0.3048 valid loss: 0.2622, valid accuracy: 0.9228
Iter-280600 train loss: 0.1584 valid loss: 0.2622, valid accuracy: 0.9228
Iter-280700 train loss: 0.2227 valid l

Iter-290500 train loss: 0.3186 valid loss: 0.2602, valid accuracy: 0.9224
Iter-290600 train loss: 0.1742 valid loss: 0.2602, valid accuracy: 0.9226
Iter-290700 train loss: 0.4483 valid loss: 0.2602, valid accuracy: 0.9226
Iter-290800 train loss: 0.4127 valid loss: 0.2602, valid accuracy: 0.9226
Iter-290900 train loss: 0.4190 valid loss: 0.2602, valid accuracy: 0.9222
Iter-291000 train loss: 0.1869 valid loss: 0.2602, valid accuracy: 0.9222
Iter-291100 train loss: 0.0941 valid loss: 0.2601, valid accuracy: 0.9226
Iter-291200 train loss: 0.2781 valid loss: 0.2601, valid accuracy: 0.9226
Iter-291300 train loss: 0.1111 valid loss: 0.2601, valid accuracy: 0.9224
Iter-291400 train loss: 0.2218 valid loss: 0.2601, valid accuracy: 0.9226
Iter-291500 train loss: 0.1730 valid loss: 0.2601, valid accuracy: 0.9226
Iter-291600 train loss: 0.3359 valid loss: 0.2600, valid accuracy: 0.9226
Iter-291700 train loss: 0.3653 valid loss: 0.2601, valid accuracy: 0.9226
Iter-291800 train loss: 0.2702 valid l

Iter-301600 train loss: 0.2966 valid loss: 0.2582, valid accuracy: 0.9230
Iter-301700 train loss: 0.2600 valid loss: 0.2581, valid accuracy: 0.9230
Iter-301800 train loss: 0.3051 valid loss: 0.2581, valid accuracy: 0.9230
Iter-301900 train loss: 0.0990 valid loss: 0.2581, valid accuracy: 0.9232
Iter-302000 train loss: 0.2815 valid loss: 0.2581, valid accuracy: 0.9230
Iter-302100 train loss: 0.3823 valid loss: 0.2581, valid accuracy: 0.9232
Iter-302200 train loss: 0.3508 valid loss: 0.2580, valid accuracy: 0.9230
Iter-302300 train loss: 0.2266 valid loss: 0.2580, valid accuracy: 0.9228
Iter-302400 train loss: 0.2478 valid loss: 0.2580, valid accuracy: 0.9230
Iter-302500 train loss: 0.2728 valid loss: 0.2580, valid accuracy: 0.9230
Iter-302600 train loss: 0.3065 valid loss: 0.2580, valid accuracy: 0.9230
Iter-302700 train loss: 0.1304 valid loss: 0.2580, valid accuracy: 0.9230
Iter-302800 train loss: 0.1278 valid loss: 0.2580, valid accuracy: 0.9230
Iter-302900 train loss: 0.1549 valid l

Iter-312700 train loss: 0.2615 valid loss: 0.2563, valid accuracy: 0.9230
Iter-312800 train loss: 0.4597 valid loss: 0.2562, valid accuracy: 0.9230
Iter-312900 train loss: 0.3958 valid loss: 0.2562, valid accuracy: 0.9230
Iter-313000 train loss: 0.2717 valid loss: 0.2561, valid accuracy: 0.9232
Iter-313100 train loss: 0.1722 valid loss: 0.2561, valid accuracy: 0.9230
Iter-313200 train loss: 0.2872 valid loss: 0.2561, valid accuracy: 0.9230
Iter-313300 train loss: 0.1965 valid loss: 0.2561, valid accuracy: 0.9230
Iter-313400 train loss: 0.2116 valid loss: 0.2561, valid accuracy: 0.9230
Iter-313500 train loss: 0.2055 valid loss: 0.2561, valid accuracy: 0.9228
Iter-313600 train loss: 0.1734 valid loss: 0.2560, valid accuracy: 0.9230
Iter-313700 train loss: 0.0763 valid loss: 0.2560, valid accuracy: 0.9232
Iter-313800 train loss: 0.3362 valid loss: 0.2560, valid accuracy: 0.9232
Iter-313900 train loss: 0.2380 valid loss: 0.2560, valid accuracy: 0.9232
Iter-314000 train loss: 0.1179 valid l

Iter-323800 train loss: 0.1572 valid loss: 0.2542, valid accuracy: 0.9230
Iter-323900 train loss: 0.2812 valid loss: 0.2541, valid accuracy: 0.9232
Iter-324000 train loss: 0.4008 valid loss: 0.2541, valid accuracy: 0.9232
Iter-324100 train loss: 0.2962 valid loss: 0.2541, valid accuracy: 0.9232
Iter-324200 train loss: 0.1809 valid loss: 0.2541, valid accuracy: 0.9232
Iter-324300 train loss: 0.1518 valid loss: 0.2541, valid accuracy: 0.9232
Iter-324400 train loss: 0.1790 valid loss: 0.2541, valid accuracy: 0.9232
Iter-324500 train loss: 0.2614 valid loss: 0.2541, valid accuracy: 0.9232
Iter-324600 train loss: 0.5588 valid loss: 0.2541, valid accuracy: 0.9230
Iter-324700 train loss: 0.2371 valid loss: 0.2540, valid accuracy: 0.9230
Iter-324800 train loss: 0.1210 valid loss: 0.2540, valid accuracy: 0.9226
Iter-324900 train loss: 0.3828 valid loss: 0.2541, valid accuracy: 0.9230
Iter-325000 train loss: 0.4259 valid loss: 0.2540, valid accuracy: 0.9230
Iter-325100 train loss: 0.3500 valid l

Iter-334900 train loss: 0.1981 valid loss: 0.2526, valid accuracy: 0.9236
Iter-335000 train loss: 0.1123 valid loss: 0.2526, valid accuracy: 0.9236
Iter-335100 train loss: 0.1545 valid loss: 0.2525, valid accuracy: 0.9234
Iter-335200 train loss: 0.3457 valid loss: 0.2525, valid accuracy: 0.9232
Iter-335300 train loss: 0.0750 valid loss: 0.2525, valid accuracy: 0.9234
Iter-335400 train loss: 0.1879 valid loss: 0.2525, valid accuracy: 0.9232
Iter-335500 train loss: 0.3408 valid loss: 0.2524, valid accuracy: 0.9234
Iter-335600 train loss: 0.2374 valid loss: 0.2524, valid accuracy: 0.9234
Iter-335700 train loss: 0.2911 valid loss: 0.2524, valid accuracy: 0.9236
Iter-335800 train loss: 0.2964 valid loss: 0.2523, valid accuracy: 0.9236
Iter-335900 train loss: 0.2653 valid loss: 0.2523, valid accuracy: 0.9236
Iter-336000 train loss: 0.3468 valid loss: 0.2523, valid accuracy: 0.9236
Iter-336100 train loss: 0.1577 valid loss: 0.2523, valid accuracy: 0.9236
Iter-336200 train loss: 0.1575 valid l

Iter-346000 train loss: 0.1277 valid loss: 0.2507, valid accuracy: 0.9236
Iter-346100 train loss: 0.1422 valid loss: 0.2507, valid accuracy: 0.9234
Iter-346200 train loss: 0.1982 valid loss: 0.2506, valid accuracy: 0.9234
Iter-346300 train loss: 0.2514 valid loss: 0.2506, valid accuracy: 0.9236
Iter-346400 train loss: 0.4354 valid loss: 0.2506, valid accuracy: 0.9236
Iter-346500 train loss: 0.4014 valid loss: 0.2506, valid accuracy: 0.9236
Iter-346600 train loss: 0.2637 valid loss: 0.2506, valid accuracy: 0.9236
Iter-346700 train loss: 0.1741 valid loss: 0.2506, valid accuracy: 0.9234
Iter-346800 train loss: 0.3132 valid loss: 0.2506, valid accuracy: 0.9236
Iter-346900 train loss: 0.1209 valid loss: 0.2506, valid accuracy: 0.9234
Iter-347000 train loss: 0.4387 valid loss: 0.2506, valid accuracy: 0.9234
Iter-347100 train loss: 0.4257 valid loss: 0.2505, valid accuracy: 0.9238
Iter-347200 train loss: 0.3835 valid loss: 0.2505, valid accuracy: 0.9238
Iter-347300 train loss: 0.1650 valid l

Iter-357100 train loss: 0.2311 valid loss: 0.2490, valid accuracy: 0.9240
Iter-357200 train loss: 0.4596 valid loss: 0.2490, valid accuracy: 0.9240
Iter-357300 train loss: 0.3490 valid loss: 0.2489, valid accuracy: 0.9238
Iter-357400 train loss: 0.1650 valid loss: 0.2489, valid accuracy: 0.9238
Iter-357500 train loss: 0.0941 valid loss: 0.2489, valid accuracy: 0.9240
Iter-357600 train loss: 0.0850 valid loss: 0.2489, valid accuracy: 0.9240
Iter-357700 train loss: 0.1758 valid loss: 0.2489, valid accuracy: 0.9240
Iter-357800 train loss: 0.4211 valid loss: 0.2488, valid accuracy: 0.9238
Iter-357900 train loss: 0.3193 valid loss: 0.2488, valid accuracy: 0.9238
Iter-358000 train loss: 0.2229 valid loss: 0.2488, valid accuracy: 0.9238
Iter-358100 train loss: 0.2831 valid loss: 0.2488, valid accuracy: 0.9240
Iter-358200 train loss: 0.3681 valid loss: 0.2488, valid accuracy: 0.9236
Iter-358300 train loss: 0.2984 valid loss: 0.2487, valid accuracy: 0.9238
Iter-358400 train loss: 0.3480 valid l

In [None]:
# # Display the learning curve and losses for training, validation, and testing
# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt

plt.plot(nn.losses['train'], label='Train loss')
plt.plot(nn.losses['valid'], label='Valid loss')
plt.legend()
plt.show()

In [None]:
plt.plot(nn.losses['valid_acc'], label='Valid accuracy')
plt.legend()
plt.show()