In [1]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
class MNISTDataset(Dataset):
    def __init__(self, csv_file):
        data = pd.read_csv(csv_file)
        self.labels = torch.tensor(data['label'].values, dtype=torch.long)
        self.images = torch.tensor(data.drop(columns='label').values, dtype=torch.float32).reshape(-1, 28 * 28)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        return image, label

train_dataset = MNISTDataset('./mnist_train.csv')
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = MNISTDataset('./mnist_test.csv')
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

for images, labels in train_dataloader:
    print("Batch of images shape:", images.shape)
    print("Batch of labels:", labels)
    break

Batch of images shape: torch.Size([32, 784])
Batch of labels: tensor([3, 0, 5, 7, 0, 7, 2, 4, 3, 8, 7, 4, 0, 7, 3, 7, 9, 2, 6, 7, 9, 8, 9, 9,
        2, 0, 1, 5, 0, 7, 6, 8])


In [3]:
class FullyConnectedNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 64)
        self.fc2 = nn.Linear(64,10)

    def forward(self, x):
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        return x

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FullyConnectedNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
num_epochs = 10
model.train()

for epoch in range(num_epochs):
    running_loss = 0.0
    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_dataloader)}')

Epoch 1, Loss: 0.5500900639007489
Epoch 2, Loss: 0.2494664280464252
Epoch 3, Loss: 0.22279974812498937
Epoch 4, Loss: 0.2117241875372827
Epoch 5, Loss: 0.2008217093044271
Epoch 6, Loss: 0.19336176056085774
Epoch 7, Loss: 0.1878412031861022
Epoch 8, Loss: 0.17891357129669438
Epoch 9, Loss: 0.17501198984683336
Epoch 10, Loss: 0.17968360941230785


In [69]:
#torch.save(model.state_dict(), 'FullyConnectedNN.pth')

In [5]:
model = FullyConnectedNN()
model.load_state_dict(torch.load("FullyConnectedNN.pth"))
model.to(device)

FullyConnectedNN(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [6]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_dataloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f}%')

Accuracy of the network on the 10000 test images: 94.74%


In [28]:
test_img, test_num = test_dataset[0]
with torch.no_grad():
    test_img = test_img.to(device)
    test_output = model.forward(test_img)
    print("actual label: ")
    print(test_num)
    print("predicted: ")
    print(torch.argmax(test_output))

print("verify hard matrix multiplication: ")
layer1_Output = (model.fc1.weight @ test_img.view(784, 1)) + model.fc1.bias.view(64,1)
layer2_Output = (model.fc2.weight @ torch.relu(layer1_Output)) + model.fc2.bias.view(10,1)
print(torch.argmax(layer2_Output.view(-1, 10)))

actual label: 
tensor(7)
predicted: 
tensor(7, device='cuda:0')
verify hard matrix multiplication: 
tensor(7, device='cuda:0')


In [20]:
bias1 = model.fc1.bias.cpu().detach().flatten().numpy()
df_b1 = pd.DataFrame([bias1])
df_b1.insert(0, 'index', -1)
df_b1.to_csv("fc1_bias.csv", index=False, header=False)

In [19]:
weights = model.fc1.weight.cpu().detach().numpy()
df = pd.DataFrame(weights)
df.to_csv("fc1_weights.csv", index=False, header=False)

In [21]:
bias2 = model.fc2.bias.cpu().detach().flatten().numpy()
df_b2 = pd.DataFrame([bias2])
df_b2.insert(0, 'index', -1)
df_b2.to_csv("fc2_bias.csv", index=False, header=False)

In [22]:
weights2 = model.fc2.weight.cpu().detach().numpy()
df2 = pd.DataFrame(weights2)
df2.to_csv("fc2_weights.csv", index=False, header=False)