# MNIST

In [1]:
import csv
import numpy as np

def load_data(filepath, delimiter=",", dtype=float):
    """Load a numerical numpy array from a file."""

    print(f"Loading {filepath}...")
    with open(filepath, "r") as f:
        data_iterator = csv.reader(f, delimiter=delimiter)
        data_list = list(data_iterator)
    data = np.asarray(data_list, dtype=dtype)
    print("Done.")
    return data

In [2]:
TRAIN_FILE = "./mnistdata/mnist_train.csv"
TEST_FILE = "./mnistdata/mnist_test.csv"

train_data = load_data(TRAIN_FILE, ',', int)
test_data = load_data(TEST_FILE, ',', int)

Loading ./mnistdata/mnist_train.csv...
Done.
Loading ./mnistdata/mnist_test.csv...
Done.


In [26]:
for row in range(28):
    if not sum(test_data[0, 28 * row: 28 * (row + 1)]):
        continue
    for col in range(28):
        idx = row * 28 + col
        print("#" if data[0, 1+idx] else " ", end="")
    print()


                            
      ######                
      ################      
      ################      
           ###########      
                  ####      
                 ####       
                 ####       
                ####        
                ####        
               ####         
               ###          
              ####          
             ####           
            #####           
            ####            
           #####            
           ####             
          #####             
          #####             
          ####              


Import source files

In [3]:
from nn import NeuralNet, Layer, LeakyRelu, MSE, Sigmoid, Softmax, CrossEntropyLoss, BinaryCrossEntropyLoss

In [4]:
def to_col(x):
    return x.reshape((x.size, 1))

def test(net, test_data):
    correct = 0
    for i, test_row in enumerate(test_data):
        if not i%1000:
            print(i)

        t = test_row[0]
        x = to_col(test_row[1:])
        out = net.forward(x)
        guess = np.argmax(out)
        if t == guess:
            correct += 1

    return correct/test_data.shape[0]

In [18]:
# test_data = load_data(TEST_FILE, ",", int)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")     # Expected to be around 10%

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
Accuracy is 10.10%


## Train the network

In [17]:
def train(net, train_data):
    # Precompute all target vectors.
    ts = {}
    for t in range(10):
        tv = np.zeros((10, 1))
        tv[t] = 1
        ts[t] = tv

    for i, train_row in enumerate(train_data):
        if not i%1000:
            print(i)

        t = ts[train_row[0]]
        x = to_col(train_row[1:])
        net.train(x, t)

In [5]:
def train(net, train_data):
    # We no longer need to compute the dictionary `ts`.
    for i, train_row in enumerate(train_data):
        # if not i%1000:
        #     print(i)
        t = train_row[0]            # <-- was   t = ts[train_row[0]]
        x = to_col(train_row[1:])
        net.train(x, t)

In [None]:
# First configuration we tried.
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, LeakyRelu()),
]
net = NeuralNet(layers, MSE(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

Sigmoid at the end

In [50]:
# Use a Sigmoid as the final layer
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, Sigmoid()),
]
net = NeuralNet(layers, MSE(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
Accuracy is 11.16%


BinaryCrossEntropyLoss

In [None]:
# Use BinaryCrossEntropyLoss
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, LeakyRelu()),
]
net = NeuralNet(layers, BinaryCrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

Sigmoid at the end and BinaryCrossEntropyLoss

In [12]:
# sigmoid at the end and the CrossEntropyLoss
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, Sigmoid()),
]
net = NeuralNet(layers, BinaryCrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16


In [76]:
def train(net, train_data):
    for i, train_row in enumerate(train_data):
        t = train_row[0]  # Assume the first element is the class label
        x = to_col(train_row[1:])  # Assume the rest are input features
        
        # Convert the class label to one-hot encoding
        t_one_hot = np.zeros((10, 1))  # net.output_size is the number of classes
        t_one_hot[t] = 1

        # Train the network
        # print("x in train:", x.shape)
        net.train(x, t_one_hot)

In [11]:
# softmax at the end and the CrossEntropyLoss
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, Softmax()),
]
net = NeuralNet(layers, CrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16


ValueError: shapes (10,10) and (1,16) not aligned: 10 (dim 1) != 1 (dim 0)