In [None]:
#%% 2-2.5, 2.7
import numpy as np
import time
import matplotlib.pyplot as plt



train_images_np=np.load('./Project3_Data/MNIST_train_images.npy')
train_labels_np=np.load('./Project3_Data/MNIST_train_labels.npy')
val_images_np=np.load('./Project3_Data/MNIST_val_images.npy')
val_labels_np=np.load('./Project3_Data/MNIST_val_labels.npy')
test_images_np=np.load('./Project3_Data/MNIST_test_images.npy')
test_labels_np=np.load('./Project3_Data/MNIST_test_labels.npy')


##Template MLP code
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x))

def sigmoid(x):
    return 1/(1+np.exp(-x))

def CrossEntropy(y_hat,y):
    return -np.dot(y,np.log(y_hat))

class MLP():

    def __init__(self):
        #Initialize all the parametres
        #Uncomment and complete the following lines
        self.W1 = np.random.normal(0,0.1,(64,train_images_np.shape[1]))
        self.b1 = np.zeros(64)
        self.W2 = np.random.normal(0,0.1,(10,64))
        self.b2 = np.zeros(10)
        self.reset_grad()

    def reset_grad(self):
        self.W2_grad = 0
        self.b2_grad = 0
        self.W1_grad = 0
        self.b1_grad = 0

    def forward(self, x):
        #Feed data through the network
        #Uncomment and complete the following lines
        self.x = x
        self.W1x = np.matmul(self.W1, self.x)
        self.a1 = self.W1x + self.b1
        self.f1 = sigmoid(self.a1)
        self.W2x = np.matmul(self.W2, self.f1)
        self.a2 = self.W2x + self.b2
        self.y_hat = softmax(self.a2)

        return self.y_hat

    def update_grad(self,y):
        # Compute the gradients for the current observation y and add it to the gradient estimate over the entire batch
        # Uncomment and complete the following lines
        dA2db2 = np.identity(10)
        # NOT the actual matrix, but we will store it like this and compute with it differently:
        # actual matrix is 10x640, this is the 1st 1x64
        dA2dW2 = self.f1
        dA2dF1 = self.W2
        dF1dA1 = np.diag(sigmoid(self.a1)*(1-sigmoid(self.a1)))
        dA1db1 = np.identity(64)
        # ALSO NOT the actual matrix, same as dA2dW2
        dA1dW1 = self.x

        dLdA2 = self.y_hat - y
        # print(dLdA2.shape)
        # 1x10 * 10x640 = 1x640 --> unvectorize --> 10x64
        # is equivalent to 10x1 * 1x64
        dLdW2 = np.atleast_2d(dLdA2).T * dA2dW2
        dLdb2 = np.matmul(dLdA2, dA2db2)
        
        #these 2 gave me errors at first
        dLdF1 = np.matmul(dLdA2, dA2dF1)
        dLdA1 = np.matmul(dLdF1, dF1dA1)
        # same as dLdW2
        dLdW1 = np.atleast_2d(dLdA1).T * dA1dW1
        dLdb1 = np.matmul(dLdA1, dA1db1)
        self.W2_grad = self.W2_grad + dLdW2
        self.b2_grad = self.b2_grad + dLdb2
        # print(self.b2_grad.shape)
        self.W1_grad = self.W1_grad + dLdW1
        self.b1_grad = self.b1_grad + dLdb1

    def update_params(self,learning_rate):
        self.W2 = self.W2 - learning_rate * self.W2_grad
        self.b2 = self.b2 - learning_rate * self.b2_grad.reshape(-1)
        self.W1 = self.W1 - learning_rate * self.W1_grad
        self.b1 = self.b1 - learning_rate * self.b1_grad.reshape(-1)
    
    def save(self, filename):
        np.savez(filename, self.W1, self.b1, self.W2, self.b2)

    def load(self, filename):
        npzfile = np.load(filename)
        self.W1 = npzfile["arr_0"]
        self.b1 = npzfile["arr_1"]
        self.W2 = npzfile["arr_2"]
        self.b2 = npzfile["arr_3"]


## Init the MLP
myNet=MLP()


learning_rate=1e-3
n_epochs=100

# variables I added
batch_size = 256

# combine training images & their labels --> useful for randomly choosing indices
training_data = np.hstack((train_images_np,np.atleast_2d(train_labels_np).T))
n_training = train_images_np.shape[0]
# n_training = 2000
training_accuracy = np.zeros(n_epochs)
# loss per epoch
training_loss = np.zeros(n_epochs)
# loss per image per epoch (temporary)
training_losses = np.zeros(n_training)

n_validation = val_images_np.shape[0]

validation_accuracy = np.zeros(n_epochs)
validation_loss = np.zeros(n_epochs)
validation_losses = np.zeros(n_validation)
numCorrect_validation = 0

## Training code
for iter in range(n_epochs):
    #Code to train network goes here

    # Counter for # of correct image classifications this epoch
    numCorrect = 0
    # shuffle training data
    np.random.shuffle(training_data)
    # batch indices
    i_start, i_end = 0,0

    # batch go thru all training data points: 
    while i_end < n_training:
        
        # update batch indices
        i_start = i_end
        if i_end + batch_size < n_training:
            i_end += batch_size
        else:
            i_end = n_training

        # reset gradients for each batch
        myNet.reset_grad()

        # for each batch, go thru each data opint
        for image_index in range(i_start, i_end):
            # predict w/ forward pass
            myNet.forward(training_data[image_index, :-1])
            
            # track # correct - not necessary, but to check if gradient is minimizing correct thing
            i_hat = np.argmax(myNet.y_hat)
            i = training_data[image_index, -1]
            if i == i_hat:
                numCorrect += 1
            # track losses
            y = np.zeros(10)
            y[i] = 1
            training_losses[image_index] = CrossEntropy(myNet.y_hat, y)

            # update: back prop
            myNet.update_grad(y)

        # gradient update
        myNet.update_params(learning_rate)

    training_accuracy[iter] = numCorrect / n_training
    training_loss[iter] = np.average(training_losses)


    #Code to compute validation loss/accuracy goes here
    numCorrect_validation = 0

    for j in range(n_validation):
        # predict w/ forward pass
        myNet.forward(val_images_np[j])
        
        # check if it was a correct guess or not
        index_hat = np.argmax(myNet.y_hat)
        index = val_labels_np[j]
        if index == index_hat:
            numCorrect_validation += 1
        # track losses
        y = np.zeros(10)
        y[index] = 1
        validation_losses[j] = CrossEntropy(myNet.y_hat, y)

    validation_accuracy[iter] = numCorrect_validation / n_validation
    validation_loss[iter] = np.average(validation_losses)


fig, ax = plt.subplots(2,1, figsize=(10,10))

ax[0].plot(training_loss, label="Training")
ax[0].plot(validation_loss, label="Validation")
ax[0].set_title("Losses vs Epoch using all images")
ax[0].set_ylabel("Average Cross Entropy Loss")
ax[0].set_xlabel("Epoch Number")
ax[0].legend()

ax[1].plot(training_accuracy, label="Training")
ax[1].plot(validation_accuracy, label="Validation")
ax[1].set_title("Accuracies vs Epoch using all images")
ax[1].set_ylabel("Model Accuracy")
ax[1].set_xlabel("Epoch Number")
ax[1].legend()

plt.show()

myNet.save("MLP_weights.npz")

In [None]:
#%% 2.6

n_test = test_images_np.shape[0]
test_accuracy = 0
test_loss = 0
test_losses = np.zeros(n_test)
numCorrect_test = 0

for i in range(n_test):
    # predict w/ forward pass
    myNet.forward(test_images_np[i])
    
    # check if it was a correct guess or not
    index_hat = np.argmax(myNet.y_hat)
    index = test_labels_np[i]
    if index == index_hat:
        numCorrect_test += 1
    # track losses
    y = np.zeros(10)
    y[index] = 1
    test_losses[i] = CrossEntropy(myNet.y_hat, y)

test_accuracy = numCorrect_test / n_test
test_loss = np.average(test_losses)

#output
print("Test accuracy: " + str(test_accuracy))
print("Test loss: " + str(test_loss))


In [None]:
#%% 2.8 Confusion Matrix
#use predictions variable + test_labels
confusion_matrix = np.zeros((10,10))
for i in range(n_test):
    confusion_matrix[test_labels_np[i], int(predictions[i])] += 1

for row in range(10):
    confusion_matrix[row] = np.divide(confusion_matrix[row], np.count_nonzero(test_labels_np == row))

plt.matshow(confusion_matrix)
plt.colorbar()
plt.show()


In [None]:
#%% 2.9 Visualize W1

fig, ax = plt.subplots(8,8, figsize=(10,10))
for i in range(64):
    ax[i//8, i % 8].matshow(myNet.W1[i].reshape((28,28)))

plt.show()

In [None]:
#%% 3: CNN

## Template for ConvNet Code
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class ConvNet(nn.Module):
    #From https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x.view(-1,1,28,28))))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

#Your training and testing code goes here
#Your training and testing code goes here
# The network first defines a 2d convolution with one channel in and 6 channels out with a kernel size of 5
# Then defines a max pool operation of kernel size 2 and stride of 2
# Then defines another 2d convolution filter with 16 distinct 6 x 5 x 5 filters
# Defines a fully connected layer 16 * 4 * 4 dim input, 120 dim output
# Then defines another linear layer with 120 dim input and 84 dim output
# Then defines the final layer of 84 dim input and 10 dim output

# The network applies the max pool operation to a rectified linear unit function onto the convolutional layer of the x value that is reshaped into the dimensions (-1,1,28,28)
# Then the input is applied to another convolutional layer that is applied to a another relu function inside the max pool operation 
# The input is then reshaped into (-1, 16 * 4 * 4)
# Then the input is applied to another relu function being applied by the first linear layer
# then the second linear layer is applied and another relu function
# then the output is saved in the third linear layer

# copied from 1. Load and normalize CIFAR10 on https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#define-a-loss-function-and-optimizer 

train_images_np=np.load('./Project3_Data/MNIST_train_images.npy')
train_labels_np=np.load('./Project3_Data/MNIST_train_labels.npy')
val_images_np=np.load('./Project3_Data/MNIST_val_images.npy')
val_labels_np=np.load('./Project3_Data/MNIST_val_labels.npy')
test_images_np=np.load('./Project3_Data/MNIST_test_images.npy')
test_labels_np=np.load('./Project3_Data/MNIST_test_labels.npy')

train_dat = []
for i, elt in enumerate(train_images_np):
    train_dat.append([elt.astype(np.float32), train_labels_np[i].astype(np.float32)])
trainloader = torch.utils.data.DataLoader(train_dat, batch_size = 256, shuffle = True)

test_dat = []
for i, elt in enumerate(test_images_np):
    test_dat.append([elt.astype(np.float32), test_labels_np[i].astype(np.float32)])
testloader = torch.utils.data.DataLoader(test_dat, batch_size = 256, shuffle = True)
classes = (0,1,2,3,4,5,6,7,8,9)

net = ConvNet()

# copied from 3. Define a Loss function and optimizer on https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#define-a-loss-function-and-optimizer
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

trainingAcc = []
testingAcc = []
# copied from 4. Train the network on https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#define-a-loss-function-and-optimizer
for epoch in range(100):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

    # Copied from section 5 of https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#define-a-loss-function-and-optimizer
    dataiter = iter(testloader)
    images, labels = next(dataiter)

    outputs = net(images)

    _, predicted = torch.max(outputs, 1)

    print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5f}'
                                for j in range(4)))

    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in trainloader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
    trainingAcc.append(100*correct // total)                            
    correct = 0
    total = 0

    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
    testingAcc.append(100*correct // total)

y = [i for i in range(len(trainingAcc))]
plt.plot(y, trainingAcc)
plt.plot(y, testingAcc)
plt.show()
torch.save(net.state_dict(), "./cnnweights.pt")

In [None]:
train_images_np=np.load('./Project3_Data/MNIST_train_images.npy')
train_labels_np=np.load('./Project3_Data/MNIST_train_labels.npy')
val_images_np=np.load('./Project3_Data/MNIST_val_images.npy')
val_labels_np=np.load('./Project3_Data/MNIST_val_labels.npy')
test_images_np=np.load('./Project3_Data/MNIST_test_images.npy')
test_labels_np=np.load('./Project3_Data/MNIST_test_labels.npy')

train_dat = []
for i, elt in enumerate(train_images_np):
    train_dat.append([elt.astype(np.float32), train_labels_np[i].astype(np.float32)])
trainloader = torch.utils.data.DataLoader(train_dat, batch_size = 256, shuffle = True)

test_dat = []
for i, elt in enumerate(test_images_np):
    test_dat.append([elt.astype(np.float32), test_labels_np[i].astype(np.float32)])
testloader = torch.utils.data.DataLoader(test_dat, batch_size = 256, shuffle = True)
classes = (0,1,2,3,4,5,6,7,8,9)

cnn_state_dict = torch.load('./cnnweights.pt')
new_cnn = ConvNet()
new_cnn.load_state_dict(cnn_state_dict)
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = new_cnn(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')