In [5]:
#imports
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
from tqdm import tqdm
import torch.nn as nn
from sklearn.metrics import confusion_matrix


In [2]:
# device as GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
train_ds = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_ds = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
# data loader
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=8, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=8, shuffle=False, num_workers=2)

In [6]:
class MLP(nn.Module):
	def __init__(self):
		super(MLP, self).__init__()
		self.fc1 = nn.Linear(784, 100)
		self.fc2 = nn.Linear(100, 10)
	
	def forward(self, input_vector):
		x = self.fc1(input_vector)
		x = torch.relu(x)
		x = self.fc2(x)
		return x


In [9]:
# hyperparameters
learning_rate = 0.001
model = MLP().to(device)
num_epochs = 6
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
# training loop
for epoch in tqdm(range(num_epochs)):
	loss_history = []
	model.train()
	for batch in train_loader:
		images, labels = batch
		images = images.reshape(-1, 784).to(device)
		labels = labels.to(device)
		outputs = model(images)
		loss = criterion(outputs, labels)
		loss_history.append(loss.item())
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
	loss_at_epoch = np.mean(loss_history)
	print(f"Epoch {epoch} loss: {loss_at_epoch}")

 17%|█▋        | 1/6 [00:12<01:03, 12.77s/it]

Epoch 0 loss: 0.23782939171215986


 33%|███▎      | 2/6 [00:24<00:49, 12.26s/it]

Epoch 1 loss: 0.10623045006358298


 50%|█████     | 3/6 [00:36<00:36, 12.25s/it]

Epoch 2 loss: 0.07539193263914494


 67%|██████▋   | 4/6 [00:49<00:24, 12.48s/it]

Epoch 3 loss: 0.058627662464822664


 83%|████████▎ | 5/6 [01:02<00:12, 12.72s/it]

Epoch 4 loss: 0.04660725784717987


100%|██████████| 6/6 [01:15<00:00, 12.62s/it]

Epoch 5 loss: 0.0393004124043114





In [11]:
# testing loop
model.eval()
pred_list = []
label_list = []
with torch.no_grad():
	for images, labels in test_loader:
		images = images.view(-1, 784).to(device)
		labels = labels.to(device)
		outputs = model(images)
		_, predicted = torch.max(outputs.data, 1)
		pred_list.append(predicted.cpu().numpy())
		label_list.append(labels.cpu().numpy())

In [None]:
conf_matrix = confusion_matrix(np.concatenate(label_list), np.concatenate(pred_list))
print(conf_matrix)