# A Simple Convolutional Neural Network (MINIST)

In [None]:
# Force-reinstall to clear any bad or shadowed torchvision modules
%pip install --upgrade --force-reinstall torch torchvision

import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import optim
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm

Collecting torch
  Using cached torch-2.8.0-cp312-none-macosx_11_0_arm64.whl.metadata (30 kB)
Collecting torchvision
  Using cached torchvision-0.23.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.1 kB)
Collecting filelock (from torch)
  Downloading filelock-3.19.1-py3-none-any.whl.metadata (2.1 kB)
Collecting typing-extensions>=4.10.0 (from torch)
  Using cached typing_extensions-4.14.1-py3-none-any.whl.metadata (3.0 kB)
Collecting setuptools (from torch)
  Downloading setuptools-80.9.0-py3-none-any.whl.metadata (6.6 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Downloading networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch)
  Downloading jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2025.7.0-py3-none-any.whl.metadata (12 kB)
Collecting numpy (from torchvision)
  Downloading numpy-2.3.2-cp312-cp312-macosx_14_0_arm6

In [176]:
class CNN(nn.Module):
				def __init__(self, in_channels, num_classes=10):
					super(CNN, self).__init__()
					
					# Layer 1: conv2D
					self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=8, kernel_size=3, stride=1, padding=1)
					# Layer 2: 2x2 max pooling
					self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
					# Layer 3: conv2D
					self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)
					# Layer 4: Fully Connected Layer
					self.fc1 = nn.Linear(16 * 14 * 14, num_classes)

				def forward(self, x):

					x = F.relu(self.conv1(x))
					x = self.pool(x)
					x = F.relu(self.conv2(x))
					x = x.reshape(x.shape[0], -1) # [22, 112, 34, ...]
					x = self.fc1(x)

					return x


In [177]:
if torch.cuda.is_available():
	device = 'cuda'
elif torch.mps.is_available:
	device = 'mps'
else:
	device = 'cpu'

print("Current Device: ", device)

Current Device:  mps


In [178]:
input_size = 784
num_classes = 10
learning_rate = 1e-2
batch_size = 64
num_epochs = 10

In [179]:
train_dataset = datasets.MNIST(root='dataset/', download=True, train=True, transform=transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

train_dataset = datasets.MNIST(root='dataset/', download=True, train=False, transform=transforms.ToTensor())
test_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

In [180]:
model = CNN(in_channels=1, num_classes=num_classes).to(device)

In [181]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [182]:
for epoch in range(num_epochs):
	print(f'Epoch [{epoch + 1} of {num_epochs}]')
	
	for batch_index, (data, targets) in enumerate(tqdm(train_loader)):
		data = data.to(device)
		targets = targets.to(device)

		scores = model(data)
		loss = criterion(scores, targets)

		optimizer.zero_grad()
		loss.backward()

		optimizer.step()

Epoch [1 of 10]


100%|██████████| 938/938 [00:02<00:00, 362.22it/s]


Epoch [2 of 10]


100%|██████████| 938/938 [00:02<00:00, 378.12it/s]


Epoch [3 of 10]


100%|██████████| 938/938 [00:02<00:00, 388.08it/s]


Epoch [4 of 10]


100%|██████████| 938/938 [00:02<00:00, 339.75it/s]


Epoch [5 of 10]


100%|██████████| 938/938 [00:02<00:00, 351.87it/s]


Epoch [6 of 10]


100%|██████████| 938/938 [00:02<00:00, 370.18it/s]


Epoch [7 of 10]


100%|██████████| 938/938 [00:02<00:00, 381.92it/s]


Epoch [8 of 10]


100%|██████████| 938/938 [00:02<00:00, 363.66it/s]


Epoch [9 of 10]


100%|██████████| 938/938 [00:02<00:00, 354.58it/s]


Epoch [10 of 10]


100%|██████████| 938/938 [00:02<00:00, 352.35it/s]


In [183]:
def check_accuracy(loader, model):
    
		num_correct = 0
		num_samples = 0

		model.eval()

		with torch.no_grad():
			for x, y in loader:
				x = x.to(device)
				y = y.to(device)

				scores = model(x)

				_, predictions = scores.max(1)
				num_correct += (predictions == y).sum()
				num_samples += predictions.size(0)

			accuracy = float(num_correct) / float(num_samples) * 100

			print(f'Accuracy: {accuracy:.2f}%')

		model.train()



In [184]:
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Accuracy: 99.28%
Accuracy: 98.21%
