In [3]:
import struct
import numpy as np
import gzip
import math
import numdifftools as nd

In [4]:
def parse_mnist(image_filename, label_filename, num_samples=60000):
    """ Read an images and labels file in MNIST format.  See this page:
    http://yann.lecun.com/exdb/mnist/ for a description of the file format.

    Args:
        image_filename (str): name of gzipped images file in MNIST format
        label_filename (str): name of gzipped labels file in MNIST format

    Returns:
        Tuple (X,y):
            X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded 
                data.  The dimensionality of the data should be 
                (num_examples x input_dim) where 'input_dim' is the full 
                dimension of the data, e.g., since MNIST images are 28x28, it 
                will be 784.  Values should be of type np.float32, and the data 
                should be normalized to have a minimum value of 0.0 and a 
                maximum value of 1.0 (i.e., scale original values of 0 to 0.0 
                and 255 to 1.0).

            y (numpy.ndarray[dtype=np.uint8]): 1D numpy array containing the
                labels of the examples.  Values should be of type np.uint8 and
                for MNIST will contain the values 0-9.
    """
    ### BEGIN YOUR CODE
    
    import gzip
    import numpy as np
    import os
    import struct

    from urllib.request import urlretrieve 

    def load_data(src, num_samples):
        print("Downloading " + src)
        ## create a temporary file
        gzfname, h = urlretrieve(src, "./delete.me")
        print("Done.")
        ## unpack the data
        try:
            with gzip.open(gzfname) as gz:
                n = struct.unpack("I", gz.read(4))
                # Read magic number.
                if n[0] != 0x3080000:
                    raise Exception("Invalid file: unexpected magic number.")
                # Read number of entries.
                n = struct.unpack(">I", gz.read(4))[0]
                if n != num_samples:
                    raise Exception(
                        "Invalid file: expected {0} entries.".format(num_samples)
                    )
                ## number of rows & columns
                crow = struct.unpack(">I", gz.read(4))[0]
                ccol = struct.unpack(">I", gz.read(4))[0]
                if crow != 28 or ccol != 28:
                    raise Exception(
                        "Invalid file: expected 28 rows/cols per image."
                    )
                # Read data.
                res = np.frombuffer( gz.read(num_samples * crow * ccol), dtype=np.uint8)
        finally:
            ## delete the temp file
            os.remove(gzfname)
        ## reshape to (num_samples, crow * ccol) and normalize to [0.0..1.0]
        ## uint8 range is [0..255]...
        res = res.reshape((num_samples, crow * ccol)) / 255.0
        ## make sure it's float32 and not float64...
        return res.astype( 'float32')


    def load_labels(src, num_samples):
        print("Downloading " + src)
        gzfname, h = urlretrieve(src, "./delete.me")
        print("Done.")
        try:
            with gzip.open(gzfname) as gz:
                n = struct.unpack("I", gz.read(4))
                # Read magic number.
                if n[0] != 0x1080000:
                    raise Exception("Invalid file: unexpected magic number.")
                # Read number of entries.
                n = struct.unpack(">I", gz.read(4))
                if n[0] != num_samples:
                    raise Exception(
                        "Invalid file: expected {0} rows.".format(num_samples)
                    )
                # Read labels.
                res = np.frombuffer(gz.read(num_samples), dtype=np.uint8)
        finally:
            os.remove(gzfname)
        return res.reshape((num_samples))


    def try_download(data_source, label_source, num_samples):
        data = load_data(data_source, num_samples)
        labels = load_labels(label_source, num_samples)
        return data, labels
    

    ## server = 'https://yann.lecun.com/exdb/mnist/'
    server = 'https://raw.githubusercontent.com/fgnt/mnist/master/'
    
    # URLs for the train image and label data
    url_train_image = server + image_filename
    url_train_labels = server + label_filename
    ## num_train_samples = 60000

    print("Downloading train data: " + url_train_image + ", " + url_train_labels)
    train_features, train_labels = try_download(url_train_image, url_train_labels, num_samples)
    
    print( "Downloading done...")
    
    return ( train_features, train_labels)
  
    ### END YOUR CODE


In [5]:
def test_parse_mnist():
    ## https://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
    ## https://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
    X,y = parse_mnist("train-images-idx3-ubyte.gz",
                      "train-labels-idx1-ubyte.gz")
    assert X.dtype == np.float32
    assert y.dtype == np.uint8
    assert X.shape == (60000,784)
    assert y.shape == (60000,)
    
    
    np.testing.assert_allclose(np.linalg.norm(X[:10]), 27.892084)
    np.testing.assert_allclose(np.linalg.norm(X[:1000]), 293.0717,
        err_msg="""If you failed this test but not the previous one,
        you are probably normalizing incorrectly. You should normalize
        w.r.t. the whole dataset, _not_ individual images.""", rtol=1e-6)
    np.testing.assert_equal(y[:10], [5, 0, 4, 1, 9, 2, 1, 3, 1, 4])
    print( "test successful")

In [6]:
test_parse_mnist()

Downloading train data: https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz, https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz
Done.
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Done.
Downloading done...
test successful


In [29]:
def softmax_loss(Z, y):
    """ Return softmax loss.  Note that for the purposes of this assignment,
    you don't need to worry about "nicely" scaling the numerical properties
    of the log-sum-exp computation, but can just compute this directly.

    Args:
        Z (np.ndarray[np.float32]): 2D numpy array of shape
            (batch_size, num_classes), containing the logit predictions for
            each class.
        y (np.ndarray[np.uint8]): 1D numpy array of shape (batch_size, )
            containing the true label of each example.

    Returns:
        Average softmax loss over the sample.
    """
    ### BEGIN YOUR CODE
    ## nbr of rows in Z == length of y?
    ## print( "length of y = " + str(len( y)))
    
    assert len(y) == np.shape( Z)[0]
   
    Z_exp = np.exp( Z)
    
    ## sum along rows
    sum_rows_Z = np.sum( Z_exp, axis=1)
    
    ## print( "sum along rows done...")
    
    log_sum_rows_Z = np.log( sum_rows_Z)
    
    ## print( "log of sums done...")
    
    ## extract the probs for the correct label y[i] from i-th row in Z
    ## this runs forever, stalling python....
    ## ZZ = - Z [:,y]

    ## thanks to ChatGPT, this works... we create an index array of the same shape as the "other" index array y...
    row_index = np.arange( Z.shape[0]) ## nbr of rows
    ZZ = Z[row_index, y]

    '''
    ## YUCK, a horrible loop...
    ZZ = np.zeros( len( y))
    for i in range( len( y)):
        ZZ[i] = Z[i, y[i]]
    '''
    
    ZZ = - ZZ + log_sum_rows_Z
    
    ## print( "cross entropy done...")
    
    ## len(y) == batch_size
    ZZ = np.sum( ZZ)/len( y)
   
    return ZZ
    ### END YOUR CODE

In [30]:
def test_softmax_loss():
    
    X,y = parse_mnist("train-images-idx3-ubyte.gz",
                      "train-labels-idx1-ubyte.gz")
    np.random.seed(0)

    Z = np.zeros((y.shape[0], 10))
    
    np.testing.assert_allclose(softmax_loss(Z,y), 2.3025850)
    
    Z = np.random.randn(y.shape[0], 10)
    np.testing.assert_allclose(softmax_loss(Z,y), 2.7291998)
    
    print( "test sucessful")

In [31]:
test_softmax_loss()

Downloading train data: https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz, https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz
Done.
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Done.
Downloading done...
test sucessful


In [54]:
def gen_one_hot_y( y, nbr_of_classes):
    '''
        generates a matrix of one-hot row vectors using y[i]
        y: np.ndarray[np.uint8]
        nbr_of_classes: int, indicating the nbr of classes.
        all elements of y < nbr_of_classes

        returns: a 2D np.ndarray[ len(y), nbr_of_classes]
    '''
    res = np.zeros( ( len( y), nbr_of_classes))
    for i in range( len( y)):
        assert( y[i] < nbr_of_classes)
        ##  I_y: in the i-th row, set the y[i]-th column to 1
        res[i, y[i]] = 1.0
    return res

def normalize_Z( Z):
    '''
        turns Z into a normalized matrix, i.e. sums over rows == 1.0 and col values corresponding to some probability

        Z: np.ndarray[np.float32] 2D array

        returns normalized( Z)
    '''
    sum_rows_Z = np.sum( Z, axis=1)  ## sum over rows
    
    Z_norm = np.zeros( Z.shape)  ## allocate S, the normalized Z

    for i in range( Z.shape[0]):  ## for all rows
            row_Z = Z[i, :]  ## alias for i-th row of Z
            Z_norm[i] = row_Z/sum_rows_Z[i]  ## normalize things by dividing each entry by the sum

            ## make sure we have a prob distribution...
            assert( math.isclose( np.sum( Z_norm[i]), 1.0, rel_tol=1E-6))

    return Z_norm

In [135]:
def softmax_regression_epoch(X, y, theta, lr = 0.1, batch_size=100):
    """ Run a single epoch of SGD for softmax regression on the data, using
    the step size lr and specified batch size.  This function should modify the
    theta matrix in place, and you should iterate through batches in X _without_
    randomizing the order.

    Args:
        X (np.ndarray[np.float32]): 2D input array of size
            (num_examples x input_dim).
        y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
        theta (np.ndarrray[np.float32]): 2D array of softmax regression
            parameters, of shape (input_dim, num_classes)
        lr (float): step size (learning rate) for SGD
        batch_size (int): size of SGD minibatch

    Returns:
        None
    """
    ### BEGIN YOUR CODE
    
    def softmax_regression_batch( X, y, theta, lr, batch_size):
        
        ''' 
            process a single batch contained in X, y 
            
            Args:
                X (np.ndarray[np.float32]): 2D input array of size
                    (batch_size x input_dim).
                y (np.ndarray[np.uint8]): 1D class label array of size (batch_size)
                theta (np.ndarrray[np.float32]): 2D array of softmax regression
                    parameters, of shape (input_dim, num_classes)
                lr (float): step size (learning rate) for SGD
                batch_size (int): size of SGD minibatch

            Returns:
                None
        '''
    
        assert( batch_size == np.shape(X)[0])
        assert( batch_size == len( y))
    
        Z = np.matmul(X, theta)  ## matrix multiply
        Z = np.exp( Z) ## exponentiate
        sum_rows_Z = np.sum( Z, axis=1)  ## sum over rows
    
        Z_norm = normalize_Z( Z)  ## np.zeros( np.shape(Z))
        I_y = gen_one_hot_y( y, np.shape( Z)[1])  ## np.zeros( np.shape(Z))

        '''
        ## compute normalized Z and I_y
        ## YUCK another python loop
        for i in range( len( y)):
            row_Z = Z[i, :]  ## alias for i-th row of Z
            Z_norm[i] = row_Z/sum_rows_Z[i]  ## normalize things by dividing each entry by the sum

            ## make sure we have a prob distribution...
            assert( math.isclose( np.sum( Z_norm[i]), 1.0, rel_tol=1E-6))

            ## lastly, fill in I_y: in the i-th row, set the y[i]-th column to 1
            ## I_y[i, y[i]] = 1.0   

        ## print( "done normalization & I_y")
        '''

        Z_minus_I = Z_norm - I_y

        grad = np.matmul( np.transpose( X), Z_minus_I)    

        ## grad = grad/batch_size

        assert( np.shape( grad) == np.shape( theta))

        ## not equivalent with theta = theta - lr*grad... which *is not* in-place
        theta -= (lr/batch_size) * grad

        return None
    
    ## iterate over samples, batch by batch...
    nbr_batches = math.ceil( len( y) / batch_size)
    print( f'nbr_samples = {len(y)}, batch-size = {batch_size}, nbr batches = {nbr_batches}')
    for i in range( nbr_batches):
        lb = i * batch_size
        ub = lb + batch_size
        
        ## if the nbr of samples is not an integer multiple of batch-size, we just stop
        if ( ub > len( y)):
            print( "truncated samples, last batch would exceed nbr of samples")
            break
            
        X_i = X[lb:ub, :]
        y_i = y[lb:ub]
        softmax_regression_batch( X_i, y_i, theta, lr, batch_size)
    
    return None
    
    ### END YOUR CODE

In [136]:
def test_softmax_regression_epoch():
    # test numeical gradient
    np.random.seed(0)
    X = np.random.randn(50,5).astype(np.float32)
    y = np.random.randint(3, size=(50,)).astype(np.uint8)
    Theta = np.zeros((5,3), dtype=np.float32)
    dTheta = -nd.Gradient(lambda Th : softmax_loss(X@Th.reshape(5,3),y))(Theta)
    print( dTheta)
    softmax_regression_epoch(X,y,Theta,lr=1.0,batch_size=50)
    np.testing.assert_allclose(dTheta.reshape(5,3), Theta, rtol=1e-4, atol=1e-4)
    
    # test multi-steps on MNIST
    X,y = parse_mnist("train-images-idx3-ubyte.gz",
                      "train-labels-idx1-ubyte.gz")
    theta = np.zeros((X.shape[1], y.max()+1), dtype=np.float32)
    softmax_regression_epoch(X[:100], y[:100], theta, lr=0.1, batch_size=10)
    np.testing.assert_allclose(np.linalg.norm(theta), 1.0947356, 
                               rtol=1e-5, atol=1e-5)
    
    print( "test done...")

In [137]:
test_softmax_regression_epoch()

[-0.02525946 -0.11465225  0.13991171  0.02291308 -0.08477389  0.06186081
  0.04386758  0.00742978 -0.05129736 -0.07611353  0.0494122   0.02670133
  0.01691807  0.01340648 -0.03032455]
nbr_samples = 50, batch-size = 50, nbr batches = 1
Downloading train data: https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz, https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz
Done.
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Done.
Downloading done...
nbr_samples = 100, batch-size = 10, nbr batches = 10
test done...


In [138]:
def loss_err(h,y):
    """ Helper function to compute both loss and error"""
    return softmax_loss(h,y), np.mean(h.argmax(axis=1) != y)


def train_softmax(X_tr, y_tr, X_te, y_te, epochs=10, lr=0.5, batch=100,
                  cpp=False):
    """ Example function to fully train a softmax regression classifier """
    theta = np.zeros((X_tr.shape[1], y_tr.max()+1), dtype=np.float32)
    print("| Epoch | Train Loss | Train Err | Test Loss | Test Err |")
    for epoch in range(epochs):
        if not cpp:
            softmax_regression_epoch(X_tr, y_tr, theta, lr=lr, batch_size=batch)
        else:
            softmax_regression_epoch_cpp(X_tr, y_tr, theta, lr=lr, batch=batch)
        train_loss, train_err = loss_err(X_tr @ theta, y_tr)
        test_loss, test_err = loss_err(X_te @ theta, y_te)
        print("|  {:>4} |    {:.5f} |   {:.5f} |   {:.5f} |  {:.5f} |"\
              .format(epoch, train_loss, train_err, test_loss, test_err))




In [139]:
X_tr, y_tr = parse_mnist("train-images-idx3-ubyte.gz",
                         "train-labels-idx1-ubyte.gz")
X_te, y_te = parse_mnist("t10k-images-idx3-ubyte.gz",
                         "t10k-labels-idx1-ubyte.gz",
                        num_samples = 10000)

print("Training softmax regression")
train_softmax(X_tr, y_tr, X_te, y_te, epochs=10, lr = 0.1)


Downloading train data: https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz, https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz
Done.
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Done.
Downloading done...
Downloading train data: https://raw.githubusercontent.com/fgnt/mnist/master/t10k-images-idx3-ubyte.gz, https://raw.githubusercontent.com/fgnt/mnist/master/t10k-labels-idx1-ubyte.gz
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/t10k-images-idx3-ubyte.gz
Done.
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/t10k-labels-idx1-ubyte.gz
Done.
Downloading done...
Training softmax regression
| Epoch | Train Loss | Train Err | Test Loss | Test Err |
nbr_samples = 60000, batch-size = 100, nbr batches = 600
|     0 |    0.38625 |   0.10812 |   0.36690 |  0.09960 |
nbr_sampl

In [116]:
def ReLU( m):
    '''
    returns ReLU (rectified linear unit) of 2D array m as a copy
    0 for values x < 0, x if x >= 0 
    '''
    return( np.maximum(0, m))

def ReLU_derivative( m):
    '''
    return derivative of ReLU of 2D array m as a copy
    note: the derivative is 0 when x = 0 and 1 when x > 0
    '''
    return (m > 0) * 1  ## one of those python tricks... turn booleans into values
    
def nn_epoch(X, y, W_1, W_2, lr = 0.1, batch_size=100):
    """ Run a single epoch of SGD for a two-layer neural network defined by the
    weights W1 and W2 (with no bias terms):
        logits = ReLU(X * W1) * W2
    The function should use the step size lr, and the specified batch size (and
    again, without randomizing the order of X).  It should modify the
    W1 and W2 matrices in place.

    Args:
        X (np.ndarray[np.float32]): 2D input array of size
            (num_examples x input_dim).
        y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
        W1 (np.ndarray[np.float32]): 2D array of first layer weights, of shape
            (input_dim, hidden_dim)
        W2 (np.ndarray[np.float32]): 2D array of second layer weights, of shape
            (hidden_dim, num_classes)
        lr (float): step size (learning rate) for SGD
        batch_size (int): size of SGD minibatch

    Returns:
        None
    """

    def nn_batch( X, y, W_1, W_2, lr = 0.1, batch_size=100):
        
        assert( X.shape[1] == W_1.shape[0])
        assert( X.shape[0] == batch_size)
        assert( len( y) == batch_size)
        
        num_examples = X.shape[0]  ## nbr of rows in batch
        input_dim = X.shape[1]     ## size of input vectors
        hidden_dim = W_1.shape[1]   ## size of hidden layer
        num_classes = W_2.shape[1]  ## size of output
    
        X_W1 = np.matmul( X, W_1)
        Z_1 = ReLU( X_W1)
        assert( Z_1.shape == ( num_examples, hidden_dim))
    
        ## multiply Z_1, W_2, exponentiate result and normalize
        Z_1_W_2 = normalize_Z( np.exp( np.matmul( Z_1, W_2)))
        I_y = gen_one_hot_y( y, num_classes)
        assert( I_y.shape == Z_1_W_2.shape)
    
        G_2 = Z_1_W_2 - I_y  
        ## elementwise multiply of the ReLU_derivative of Z_1 with (G_2 * W_2^T)
        G_1 = np.multiply( ReLU_derivative( Z_1), np.matmul( G_2, np.transpose( W_2)))
    
        assert( G_2.shape == ( num_examples, num_classes))
        assert( G_1.shape == ( num_examples, hidden_dim))

        grad_W_1 = np.matmul( np.transpose( X), G_1)/batch_size
        grad_W_2 = np.matmul( np.transpose( Z_1), G_2)/batch_size

        W_1 -= lr * grad_W_1
        W_2 -= lr * grad_W_2
    
    ## iterate over samples, batch by batch...
    nbr_batches = math.ceil( len( y) / batch_size)
    print( f'nbr_samples = {len(y)}, batch-size = {batch_size}, nbr batches = {nbr_batches}')
    for i in range( nbr_batches):
        lb = i * batch_size
        ub = lb + batch_size
        
        ## if the nbr of samples is not an integer multiple of batch-size, we just stop
        if ( ub > len( y)):
            print( "truncated samples, last batch would exceed nbr of samples")
            break
            
        X_i = X[lb:ub, :]
        y_i = y[lb:ub]
        nn_batch( X_i, y_i, W_1, W_2, lr, batch_size)
    
    return None


In [131]:
def train_nn(X_tr, y_tr, X_te, y_te, hidden_dim = 500,
             epochs=10, lr=0.5, batch_size=100):
    """ Example function to train two layer neural network """
    n, k = X_tr.shape[1], y_tr.max() + 1
    np.random.seed(0)
    W1 = np.random.randn(n, hidden_dim).astype(np.float32) / np.sqrt(hidden_dim)
    W2 = np.random.randn(hidden_dim, k).astype(np.float32) / np.sqrt(k)

    print("| Epoch | Train Loss | Train Err | Test Loss | Test Err |")
    for epoch in range(epochs):
        nn_epoch(X_tr, y_tr, W1, W2, lr=lr, batch_size=batch_size)
        train_loss, train_err = loss_err(np.maximum(X_tr@W1,0)@W2, y_tr)
        test_loss, test_err = loss_err(np.maximum(X_te@W1,0)@W2, y_te)
        print("|  {:>4} |    {:.5f} |   {:.5f} |   {:.5f} |  {:.5f} |"\
              .format(epoch, train_loss, train_err, test_loss, test_err))



In [132]:
def test_nn_epoch():

    # test nn gradients
    np.random.seed(0)
    X = np.random.randn(50,5).astype(np.float32)
    y = np.random.randint(3, size=(50,)).astype(np.uint8)
    W1 = np.random.randn(5, 10).astype(np.float32) / np.sqrt(10)
    W2 = np.random.randn(10, 3).astype(np.float32) / np.sqrt(3)
    dW1 = nd.Gradient(lambda W1_ : 
        softmax_loss(np.maximum(X@W1_.reshape(5,10),0)@W2, y))(W1)
    dW2 = nd.Gradient(lambda W2_ : 
        softmax_loss(np.maximum(X@W1,0)@W2_.reshape(10,3), y))(W2)
    W1_0, W2_0 = W1.copy(), W2.copy()

    nn_epoch(X, y, W1, W2, lr=1.0, batch_size=50)
    print( "done first nn_epoch...")
    
    np.testing.assert_allclose(dW1.reshape(5,10), W1_0-W1, rtol=1e-4, atol=1e-4)
    np.testing.assert_allclose(dW2.reshape(10,3), W2_0-W2, rtol=1e-4, atol=1e-4)

    # test full epoch
    X,y = parse_mnist("train-images-idx3-ubyte.gz",
                      "train-labels-idx1-ubyte.gz")
    np.random.seed(0)
    W1 = np.random.randn(X.shape[1], 100).astype(np.float32) / np.sqrt(100)
    W2 = np.random.randn(100, 10).astype(np.float32) / np.sqrt(10)
    nn_epoch(X, y, W1, W2, lr=0.2, batch_size=100)
    print( "done second nn_epoch...")
    np.testing.assert_allclose(np.linalg.norm(W1), 28.437788, 
                               rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(np.linalg.norm(W2), 10.455095, 
                               rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(loss_err(np.maximum(X@W1,0)@W2, y),
                               (0.19770025, 0.06006667), rtol=1e-4, atol=1e-4)

    print( "test ok")

In [133]:
test_nn_epoch()

nbr_samples = 50, batch-size = 50, nbr batches = 1
done first nn_epoch...
Downloading train data: https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz, https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz
Done.
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Done.
Downloading done...
nbr_samples = 60000, batch-size = 100, nbr batches = 600
done second nn_epoch...
test ok


In [134]:
X_tr, y_tr = parse_mnist("train-images-idx3-ubyte.gz",
                         "train-labels-idx1-ubyte.gz")
X_te, y_te = parse_mnist("t10k-images-idx3-ubyte.gz",
                         "t10k-labels-idx1-ubyte.gz",
                        num_samples = 10000)

print("Training 2 layer neural network")
train_nn(X_tr, y_tr, X_te, y_te, epochs=10, lr = 0.1)

Downloading train data: https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz, https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz
Done.
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz
Done.
Downloading done...
Downloading train data: https://raw.githubusercontent.com/fgnt/mnist/master/t10k-images-idx3-ubyte.gz, https://raw.githubusercontent.com/fgnt/mnist/master/t10k-labels-idx1-ubyte.gz
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/t10k-images-idx3-ubyte.gz
Done.
Downloading https://raw.githubusercontent.com/fgnt/mnist/master/t10k-labels-idx1-ubyte.gz
Done.
Downloading done...
Training 2 layer neural network
| Epoch | Train Loss | Train Err | Test Loss | Test Err |
nbr_samples = 60000, batch-size = 100, nbr batches = 600
|     0 |    0.15143 |   0.04567 |   0.16062 |  0.04830 |
nbr_s