# MNIST

In [2]:
import csv
import numpy as np

def load_data(filepath, delimiter=",", dtype=float):
    """Load a numerical numpy array from a file."""

    print(f"Loading {filepath}...")
    with open(filepath, "r") as f:
        data_iterator = csv.reader(f, delimiter=delimiter)
        data_list = list(data_iterator)
    data = np.asarray(data_list, dtype=dtype)
    print("Done.")
    return data

In [3]:
TRAIN_FILE = "./mnistdata/mnist_train.csv"
TEST_FILE = "./mnistdata/mnist_test.csv"

train_data = load_data(TRAIN_FILE, ',', int)
test_data = load_data(TEST_FILE, ',', int)

Loading ./mnistdata/mnist_train.csv...


FileNotFoundError: [Errno 2] No such file or directory: './mnistdata/mnist_train.csv'

In [None]:
for row in range(28):
    if not sum(test_data[0, 28 * row: 28 * (row + 1)]):
        continue
    for col in range(28):
        idx = row * 28 + col
        print("#" if test_data[0, 1+idx] else " ", end="")
    print()

Import source files

In [69]:
from nn import NeuralNet, Layer
from srcs.activations import LeakyRelu, Sigmoid, Softmax
from srcs.losses import MSE, CrossEntropyLoss, BinaryCrossEntropyLoss

In [72]:
def to_col(x):
    return x.reshape((x.size, 1))

def test(net, test_data):
    correct = 0
    for i, test_row in enumerate(test_data):

        y = test_row[0]
        x = to_col(test_row[1:])
        out = net.forward(x)
        y_pred = np.argmax(out)
        if not i % 3000:
            print('pred:', y_pred, 'true:', y)
        if y == y_pred:
            correct += 1

    return correct/test_data.shape[0]

## Train the network

### 1. LeakyRelu

In [47]:
def train(net, train_data):
    # We no longer need to compute the dictionary `ts`.
    for i, train_row in enumerate(train_data):
        # if not i%1000:
        #     print(i)
        x = to_col(train_row[1:])
        y = train_row[0]
        # if not i%10000:
        #     print('y:', y)
        net.train(x, y)

#### 1-1. LeakyRelu + MSE

In [73]:
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, LeakyRelu()),
]
net = NeuralNet(layers, MSE(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
epoch:  0
pred: 7 true: 7
pred: 6 true: 6
pred: 9 true: 9
pred: 7 true: 7
Accuracy is 91.46%


#### 1-2. LeakyRelu + BinaryCrossEntropyLoss

In [62]:
# Use BinaryCrossEntropyLoss
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, LeakyRelu()),
]
net = NeuralNet(layers, BinaryCrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 0 true: 7
guess: 0 true: 2
guess: 0 true: 1
guess: 0 true: 0
guess: 0 true: 4
guess: 0 true: 1
guess: 0 true: 4
guess: 0 true: 9
guess: 0 true: 5
guess: 0 true: 9
guess: 0 true: 0
guess: 0 true: 6
guess: 0 true: 9
guess: 0 true: 0
guess: 0 true: 1
guess: 0 true: 5
guess: 0 true: 9
guess: 0 true: 7
guess: 0 true: 3
guess: 0 true: 4
guess: 0 true: 9
guess: 0 true: 6
guess: 0 true: 6
guess: 0 true: 5
guess: 0 true: 4
guess: 0 true: 0
guess: 0 true: 7
guess: 0 true: 4
guess: 0 true: 0
guess: 0 true: 1
guess: 0 true: 3
guess: 0 true: 1
guess: 0 true: 3
guess: 0 true: 4
guess: 0 true: 7
guess: 0 true: 2
guess: 0 true: 7
guess: 0 true: 1
guess: 0 true: 2
guess: 0 true: 1
guess: 0 true: 1
guess: 0 true: 7
guess: 0 true: 4
guess: 0 true: 2
guess: 0 true: 3
guess: 0 true: 5
guess: 0 true: 1
guess: 0 true: 2
guess: 0 true: 4
guess: 0 true: 4
guess: 0 true: 6
guess: 0 true: 3
guess: 0 true: 5
guess: 0 true: 5
guess: 0 true: 

1-3. LeakyRelu + CrossEntropyLoss

In [63]:
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, LeakyRelu()),
]
net = NeuralNet(layers, CrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 7 true: 7
guess: 5 true: 2
guess: 1 true: 1
guess: 0 true: 0
guess: 4 true: 4
guess: 1 true: 1
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 5
guess: 9 true: 9
guess: 0 true: 0
guess: 6 true: 6
guess: 9 true: 9
guess: 0 true: 0
guess: 1 true: 1
guess: 5 true: 5
guess: 9 true: 9
guess: 7 true: 7
guess: 3 true: 3
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 6
guess: 6 true: 6
guess: 5 true: 5
guess: 4 true: 4
guess: 0 true: 0
guess: 7 true: 7
guess: 4 true: 4
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 3
guess: 1 true: 1
guess: 3 true: 3
guess: 4 true: 4
guess: 7 true: 7
guess: 2 true: 2
guess: 7 true: 7
guess: 1 true: 1
guess: 3 true: 2
guess: 1 true: 1
guess: 1 true: 1
guess: 7 true: 7
guess: 4 true: 4
guess: 2 true: 2
guess: 3 true: 3
guess: 5 true: 5
guess: 1 true: 1
guess: 2 true: 2
guess: 4 true: 4
guess: 4 true: 4
guess: 6 true: 6
guess: 3 true: 3
guess: 5 true: 5
guess: 5 true: 5
guess: 6 true: 

### 2. Sigmoid

In [64]:
def train(net, train_data):
    # Precompute all target vectors.
    ts = {}
    for t in range(10):
        tv = np.zeros((10, 1))
        tv[t] = 1
        ts[t] = tv

    # print("ts:", ts)

    for i, train_row in enumerate(train_data):
        if not i%10000:
            print(i)

        t = ts[train_row[0]] # one-hot vector
        x = to_col(train_row[1:])
        net.train(x, t)

#### 2-1. Sigmoid + MSE

In [65]:
# Use a Sigmoid as the final layer
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, Sigmoid()),
]
net = NeuralNet(layers, MSE(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 7 true: 7
guess: 0 true: 2
guess: 1 true: 1
guess: 0 true: 0
guess: 4 true: 4
guess: 1 true: 1
guess: 4 true: 4
guess: 4 true: 9
guess: 6 true: 5
guess: 9 true: 9
guess: 0 true: 0
guess: 2 true: 6
guess: 4 true: 9
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 5
guess: 4 true: 9
guess: 7 true: 7
guess: 7 true: 3
guess: 4 true: 4
guess: 7 true: 9
guess: 6 true: 6
guess: 4 true: 6
guess: 7 true: 5
guess: 4 true: 4
guess: 0 true: 0
guess: 7 true: 7
guess: 4 true: 4
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 3
guess: 1 true: 1
guess: 3 true: 3
guess: 6 true: 4
guess: 7 true: 7
guess: 2 true: 2
guess: 7 true: 7
guess: 1 true: 1
guess: 3 true: 2
guess: 1 true: 1
guess: 1 true: 1
guess: 7 true: 7
guess: 1 true: 4
guess: 1 true: 2
guess: 3 true: 3
guess: 0 true: 5
guess: 1 true: 1
guess: 6 true: 2
guess: 4 true: 4
guess: 4 true: 4
guess: 6 true: 6
guess: 7 true: 3
guess: 7 true: 5
guess: 3 true: 5
guess: 2 true: 

#### 2-2. Sigmoid + BinaryCrossEntropyLoss

In [66]:
# sigmoid at the end and the BinaryCrossEntropyLoss
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, Sigmoid()),
]
net = NeuralNet(layers, BinaryCrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 7 true: 7
guess: 2 true: 2
guess: 1 true: 1
guess: 0 true: 0
guess: 4 true: 4
guess: 1 true: 1
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 5
guess: 9 true: 9
guess: 0 true: 0
guess: 6 true: 6
guess: 9 true: 9
guess: 0 true: 0
guess: 1 true: 1
guess: 5 true: 5
guess: 9 true: 9
guess: 7 true: 7
guess: 3 true: 3
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 6
guess: 6 true: 6
guess: 5 true: 5
guess: 4 true: 4
guess: 0 true: 0
guess: 7 true: 7
guess: 4 true: 4
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 3
guess: 1 true: 1
guess: 3 true: 3
guess: 6 true: 4
guess: 7 true: 7
guess: 2 true: 2
guess: 7 true: 7
guess: 1 true: 1
guess: 3 true: 2
guess: 1 true: 1
guess: 1 true: 1
guess: 7 true: 7
guess: 7 true: 4
guess: 2 true: 2
guess: 3 true: 3
guess: 5 true: 5
guess: 1 true: 1
guess: 2 true: 2
guess: 4 true: 4
guess: 4 true: 4
guess: 6 true: 6
guess: 3 true: 3
guess: 7 true: 5
guess: 5 true: 5
guess: 6 true: 

#### 2-3. Sigmoid + CrossEntropyLoss

In [67]:
# sigmoid at the end and the CrossEntropyLoss
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, Sigmoid()),
]
net = NeuralNet(layers, CrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 7 true: 7
guess: 2 true: 2
guess: 1 true: 1
guess: 0 true: 0
guess: 4 true: 4
guess: 1 true: 1
guess: 4 true: 4
guess: 9 true: 9
guess: 4 true: 5
guess: 9 true: 9
guess: 0 true: 0
guess: 6 true: 6
guess: 9 true: 9
guess: 0 true: 0
guess: 1 true: 1
guess: 5 true: 5
guess: 9 true: 9
guess: 7 true: 7
guess: 3 true: 3
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 6
guess: 6 true: 6
guess: 5 true: 5
guess: 4 true: 4
guess: 0 true: 0
guess: 7 true: 7
guess: 4 true: 4
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 3
guess: 1 true: 1
guess: 3 true: 3
guess: 6 true: 4
guess: 7 true: 7
guess: 2 true: 2
guess: 7 true: 7
guess: 1 true: 1
guess: 3 true: 2
guess: 1 true: 1
guess: 1 true: 1
guess: 7 true: 7
guess: 4 true: 4
guess: 2 true: 2
guess: 3 true: 3
guess: 5 true: 5
guess: 1 true: 1
guess: 2 true: 2
guess: 9 true: 4
guess: 4 true: 4
guess: 6 true: 6
guess: 3 true: 3
guess: 5 true: 5
guess: 5 true: 5
guess: 6 true: 

### 3. Softmax

In [58]:
def train(net, train_data):
    for i, train_row in enumerate(train_data):
        t = train_row[0]  # Assume the first element is the class label
        x = to_col(train_row[1:])  # Assume the rest are input features
        
        # Convert the class label to one-hot encoding
        t_one_hot = np.zeros((10, 1))  # net.output_size is the number of classes
        t_one_hot[t] = 1

        # Train the network
        # print("x in train:", x.shape)
        net.train(x, t_one_hot)

#### 3-1. Softmax + MSE

In [59]:
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, Softmax()),
]
net = NeuralNet(layers, MSE(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16


ValueError: shapes (10,10) and (1,16) not aligned: 10 (dim 1) != 1 (dim 0)

#### 3-2. Softmax + BinaryCrossEntropyLoss

In [56]:
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, Softmax()),
]
net = NeuralNet(layers, BinaryCrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16


ValueError: shapes (10,10) and (1,16) not aligned: 10 (dim 1) != 1 (dim 0)

#### 3-3. Softmax + CrossEntropyLoss

In [57]:
layers = [
    Layer(784, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, 10, Softmax()),
]
net = NeuralNet(layers, CrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16


ValueError: shapes (10,10) and (1,16) not aligned: 10 (dim 1) != 1 (dim 0)