# MNIST

In [94]:
import csv
import numpy as np

def load_data(filepath, delimiter=",", dtype=float):
    """Load a numerical numpy array from a file."""

    print(f"Loading {filepath}...")
    with open(filepath, "r") as f:
        data_iterator = csv.reader(f, delimiter=delimiter)
        data_list = list(data_iterator)
    data = np.asarray(data_list, dtype=dtype)
    y = data[:, 0].reshape(-1, 1)
    x = data[:, 1:]
    print("Done.")
    return x, y

## 1. Dataset load

### 1-1. Using CSV dataset

In [95]:
TRAIN_FILE = "./datasets/MNIST/data_train.csv"
TEST_FILE =  "./datasets/MNIST/data_test.csv"

# train_data = load_data(TRAIN_FILE, ',', int)
# test_data = load_data(TEST_FILE, ',', int)

x_train, y_train = load_data(TRAIN_FILE, ',', int)
x_test, y_test = load_data(TEST_FILE, ',', int)

Loading ./datasets/MNIST/data_train.csv...
Done.
Loading ./datasets/MNIST/data_test.csv...
Done.


In [96]:
x_train, x_test = x_train / 255.0, x_test / 255.0

print("x train shape:", x_train.shape, x_train.dtype)
print("y train shape:", y_train.shape, y_train.dtype)
print("x test shape:", x_test.shape, x_test.dtype)
print("y test shape:", y_test.shape, y_test.dtype)

x train shape: (60000, 784) float64
y train shape: (60000, 1) int64
x test shape: (10000, 784) float64
y test shape: (10000, 1) int64


In [None]:
train_data = np.hstack((y_train, x_train))
print(train_data.shape)
test_data = np.hstack((y_test, x_test))
print(test_data.shape)

(60000, 785)
(10000, 785)


### 1-2. Using keras

In [98]:
import tensorflow as tf

mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test,y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train.reshape(x_train.shape[0], -1)
y_train = y_train.reshape(y_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

print("x train shape:", x_train.shape, x_train.dtype)
print("y train shape:", y_train.shape, y_train.dtype)
print("x test shape:", x_test.shape, x_test.dtype)
print("y test shape:", y_test.shape, y_test.dtype)

x train shape: (60000, 784) float64
y train shape: (60000, 1) uint8
x test shape: (10000, 784) float64
y test shape: (10000, 1) uint8


In [None]:
train_data1 = np.hstack((y_train, x_train))
print(train_data1.shape)
test_data1 = np.hstack((y_test, x_test))
print(test_data1.shape)

(60000, 785)
(10000, 785)


In [100]:
t = train_data1 == train_data
print(t.all())

t = test_data1 == test_data
print(t.all())

True
True


In [44]:
data = train_data

for row in range(28):
    if not sum(data[0, 28 * row: 28 * (row + 1)]):
        continue
    for col in range(28):
        idx = row * 28 + col
        print("#" if data[0, 1+idx] else " ", end="")
    print()

                            
            ############    
        ################    
       ################     
       ###########          
        ####### ##          
         #####              
           ####             
           ####             
            ######          
             ######         
              ######        
               #####        
                 ####       
              #######       
            ########        
          #########         
        ##########          
      ##########            
    ##########              
    ########                


Import source files

In [101]:
from nn.nn1 import NeuralNet, Layer
from srcs.activations import LeakyRelu, Sigmoid, Softmax
from srcs.losses import MSELoss, CrossEntropyLoss, BCELoss

In [102]:
def to_col(x):
    return x.reshape((x.size, 1))

def test(net, test_data):
    correct = 0
    for i, test_row in enumerate(test_data):

        y = test_row[0]
        x = to_col(test_row[1:])
        out = net.forward(x)
        y_pred = np.argmax(out)
        if not i % 3000:
            print('pred:', y_pred, 'true:', y)
        if y == y_pred:
            correct += 1

    return correct/test_data.shape[0]

## 2. Train the network

In [112]:
input_shape = x_train.shape[1]
output_shape = len(np.unique(y_train))

print("input shape:", input_shape)
print("output shape:", output_shape)

input shape: 784
output shape: 10


### 2-1. LeakyRelu

In [103]:
def train(net, train_data):
    # We no longer need to compute the dictionary `ts`.
    for i, train_row in enumerate(train_data):
        # if not i%1000:
        #     print(i)
        x = to_col(train_row[1:])
        y = train_row[0]
        # if not i%10000:
        #     print('y:', y)
        net.train(x, y)

#### 2-1-1. LeakyRelu + MSELoss

In [104]:
layers = [
    Layer(input_shape, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, output_shape, LeakyRelu()),
]
net = NeuralNet(layers, MSELoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
pred: 0 true: 7.0
pred: 0 true: 6.0
pred: 0 true: 9.0
pred: 0 true: 7.0
Accuracy is 9.80%


#### 2-1-2. LeakyRelu + BCELoss

In [62]:
# Use BCELoss
layers = [
    Layer(input_shape, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, output_shape, LeakyRelu()),
]
net = NeuralNet(layers, BCELoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 0 true: 7
guess: 0 true: 2
guess: 0 true: 1
guess: 0 true: 0
guess: 0 true: 4
guess: 0 true: 1
guess: 0 true: 4
guess: 0 true: 9
guess: 0 true: 5
guess: 0 true: 9
guess: 0 true: 0
guess: 0 true: 6
guess: 0 true: 9
guess: 0 true: 0
guess: 0 true: 1
guess: 0 true: 5
guess: 0 true: 9
guess: 0 true: 7
guess: 0 true: 3
guess: 0 true: 4
guess: 0 true: 9
guess: 0 true: 6
guess: 0 true: 6
guess: 0 true: 5
guess: 0 true: 4
guess: 0 true: 0
guess: 0 true: 7
guess: 0 true: 4
guess: 0 true: 0
guess: 0 true: 1
guess: 0 true: 3
guess: 0 true: 1
guess: 0 true: 3
guess: 0 true: 4
guess: 0 true: 7
guess: 0 true: 2
guess: 0 true: 7
guess: 0 true: 1
guess: 0 true: 2
guess: 0 true: 1
guess: 0 true: 1
guess: 0 true: 7
guess: 0 true: 4
guess: 0 true: 2
guess: 0 true: 3
guess: 0 true: 5
guess: 0 true: 1
guess: 0 true: 2
guess: 0 true: 4
guess: 0 true: 4
guess: 0 true: 6
guess: 0 true: 3
guess: 0 true: 5
guess: 0 true: 5
guess: 0 true: 

#### 2-1-3. LeakyRelu + CrossEntropyLoss

In [63]:
layers = [
    Layer(input_shape, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, output_shape, LeakyRelu()),
]
net = NeuralNet(layers, CrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 7 true: 7
guess: 5 true: 2
guess: 1 true: 1
guess: 0 true: 0
guess: 4 true: 4
guess: 1 true: 1
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 5
guess: 9 true: 9
guess: 0 true: 0
guess: 6 true: 6
guess: 9 true: 9
guess: 0 true: 0
guess: 1 true: 1
guess: 5 true: 5
guess: 9 true: 9
guess: 7 true: 7
guess: 3 true: 3
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 6
guess: 6 true: 6
guess: 5 true: 5
guess: 4 true: 4
guess: 0 true: 0
guess: 7 true: 7
guess: 4 true: 4
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 3
guess: 1 true: 1
guess: 3 true: 3
guess: 4 true: 4
guess: 7 true: 7
guess: 2 true: 2
guess: 7 true: 7
guess: 1 true: 1
guess: 3 true: 2
guess: 1 true: 1
guess: 1 true: 1
guess: 7 true: 7
guess: 4 true: 4
guess: 2 true: 2
guess: 3 true: 3
guess: 5 true: 5
guess: 1 true: 1
guess: 2 true: 2
guess: 4 true: 4
guess: 4 true: 4
guess: 6 true: 6
guess: 3 true: 3
guess: 5 true: 5
guess: 5 true: 5
guess: 6 true: 

### 2-2. Sigmoid

In [64]:
def train(net, train_data):
    # Precompute all target vectors.
    ts = {}
    for t in range(10):
        tv = np.zeros((10, 1))
        tv[t] = 1
        ts[t] = tv

    # print("ts:", ts)

    for i, train_row in enumerate(train_data):
        if not i%10000:
            print(i)

        t = ts[train_row[0]] # one-hot vector
        x = to_col(train_row[1:])
        net.train(x, t)

#### 2-2-1. Sigmoid + MSELoss

In [65]:
# Use a Sigmoid as the final layer
layers = [
    Layer(input_shape, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, output_shape, Sigmoid()),
]
net = NeuralNet(layers, MSELoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 7 true: 7
guess: 0 true: 2
guess: 1 true: 1
guess: 0 true: 0
guess: 4 true: 4
guess: 1 true: 1
guess: 4 true: 4
guess: 4 true: 9
guess: 6 true: 5
guess: 9 true: 9
guess: 0 true: 0
guess: 2 true: 6
guess: 4 true: 9
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 5
guess: 4 true: 9
guess: 7 true: 7
guess: 7 true: 3
guess: 4 true: 4
guess: 7 true: 9
guess: 6 true: 6
guess: 4 true: 6
guess: 7 true: 5
guess: 4 true: 4
guess: 0 true: 0
guess: 7 true: 7
guess: 4 true: 4
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 3
guess: 1 true: 1
guess: 3 true: 3
guess: 6 true: 4
guess: 7 true: 7
guess: 2 true: 2
guess: 7 true: 7
guess: 1 true: 1
guess: 3 true: 2
guess: 1 true: 1
guess: 1 true: 1
guess: 7 true: 7
guess: 1 true: 4
guess: 1 true: 2
guess: 3 true: 3
guess: 0 true: 5
guess: 1 true: 1
guess: 6 true: 2
guess: 4 true: 4
guess: 4 true: 4
guess: 6 true: 6
guess: 7 true: 3
guess: 7 true: 5
guess: 3 true: 5
guess: 2 true: 

#### 2-2-2. Sigmoid + BCELoss

In [66]:
# sigmoid at the end and the BCELoss
layers = [
    Layer(input_shape, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, output_shape, Sigmoid()),
]
net = NeuralNet(layers, BCELoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 7 true: 7
guess: 2 true: 2
guess: 1 true: 1
guess: 0 true: 0
guess: 4 true: 4
guess: 1 true: 1
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 5
guess: 9 true: 9
guess: 0 true: 0
guess: 6 true: 6
guess: 9 true: 9
guess: 0 true: 0
guess: 1 true: 1
guess: 5 true: 5
guess: 9 true: 9
guess: 7 true: 7
guess: 3 true: 3
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 6
guess: 6 true: 6
guess: 5 true: 5
guess: 4 true: 4
guess: 0 true: 0
guess: 7 true: 7
guess: 4 true: 4
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 3
guess: 1 true: 1
guess: 3 true: 3
guess: 6 true: 4
guess: 7 true: 7
guess: 2 true: 2
guess: 7 true: 7
guess: 1 true: 1
guess: 3 true: 2
guess: 1 true: 1
guess: 1 true: 1
guess: 7 true: 7
guess: 7 true: 4
guess: 2 true: 2
guess: 3 true: 3
guess: 5 true: 5
guess: 1 true: 1
guess: 2 true: 2
guess: 4 true: 4
guess: 4 true: 4
guess: 6 true: 6
guess: 3 true: 3
guess: 7 true: 5
guess: 5 true: 5
guess: 6 true: 

#### 2-2-3. Sigmoid + CrossEntropyLoss

In [67]:
# sigmoid at the end and the CrossEntropyLoss
layers = [
    Layer(input_shape, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, output_shape, Sigmoid()),
]
net = NeuralNet(layers, CrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16
0
10000
20000
30000
40000
50000
0
guess: 7 true: 7
guess: 2 true: 2
guess: 1 true: 1
guess: 0 true: 0
guess: 4 true: 4
guess: 1 true: 1
guess: 4 true: 4
guess: 9 true: 9
guess: 4 true: 5
guess: 9 true: 9
guess: 0 true: 0
guess: 6 true: 6
guess: 9 true: 9
guess: 0 true: 0
guess: 1 true: 1
guess: 5 true: 5
guess: 9 true: 9
guess: 7 true: 7
guess: 3 true: 3
guess: 4 true: 4
guess: 9 true: 9
guess: 6 true: 6
guess: 6 true: 6
guess: 5 true: 5
guess: 4 true: 4
guess: 0 true: 0
guess: 7 true: 7
guess: 4 true: 4
guess: 0 true: 0
guess: 1 true: 1
guess: 3 true: 3
guess: 1 true: 1
guess: 3 true: 3
guess: 6 true: 4
guess: 7 true: 7
guess: 2 true: 2
guess: 7 true: 7
guess: 1 true: 1
guess: 3 true: 2
guess: 1 true: 1
guess: 1 true: 1
guess: 7 true: 7
guess: 4 true: 4
guess: 2 true: 2
guess: 3 true: 3
guess: 5 true: 5
guess: 1 true: 1
guess: 2 true: 2
guess: 9 true: 4
guess: 4 true: 4
guess: 6 true: 6
guess: 3 true: 3
guess: 5 true: 5
guess: 5 true: 5
guess: 6 true: 

### 2-3. Softmax

In [58]:
def train(net, train_data):
    for i, train_row in enumerate(train_data):
        t = train_row[0]  # Assume the first element is the class label
        x = to_col(train_row[1:])  # Assume the rest are input features
        
        # Convert the class label to one-hot encoding
        t_one_hot = np.zeros((10, 1))  # net.output_size is the number of classes
        t_one_hot[t] = 1

        # Train the network
        # print("x in train:", x.shape)
        net.train(x, t_one_hot)

#### 2-3-1. Softmax + MSELoss

In [59]:
layers = [
    Layer(input_shape, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, output_shape, Softmax()),
]
net = NeuralNet(layers, MSELoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16


ValueError: shapes (10,10) and (1,16) not aligned: 10 (dim 1) != 1 (dim 0)

#### 2-3-2. Softmax + BCELoss

In [56]:
layers = [
    Layer(input_shape, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, output_shape, Softmax()),
]
net = NeuralNet(layers, BCELoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16


ValueError: shapes (10,10) and (1,16) not aligned: 10 (dim 1) != 1 (dim 0)

#### 2-3-3. Softmax + CrossEntropyLoss

In [57]:
layers = [
    Layer(input_shape, 16, LeakyRelu()),
    Layer(16, 16, LeakyRelu()),
    Layer(16, output_shape, Softmax()),
]
net = NeuralNet(layers, CrossEntropyLoss(), 0.001)

train(net, train_data)

accuracy = test(net, test_data)
print(f"Accuracy is {100*accuracy:.2f}%")

from, to: 784 16
from, to: 16 16


ValueError: shapes (10,10) and (1,16) not aligned: 10 (dim 1) != 1 (dim 0)