In [2]:


# Imports
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F
from torch.utils.data import (
    DataLoader,
)  # Gives easier dataset managment and creates mini batches
import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
import torchvision.transforms as transforms  # Transformations we can perform on our dataset
from tqdm import tqdm  # For nice progress bar!

In [3]:

# Simple CNN
class CNN(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=in_channels, out_channels=8, kernel_size=3, stride=1, padding=1
        )
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = nn.Conv2d(
            in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1
        )
        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return x


In [4]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

in_channels = 1
num_classes = 10
num_epochs = 50

batch_size = 64
learning_rate = 0.1

train_dataset = datasets.MNIST(
    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

model = CNN(in_channels=in_channels, num_classes=num_classes).to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



In [5]:

# Define Scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, factor=0.1, patience=5, verbose=True
)

In [6]:

for epoch in range(num_epochs):
    losses = []
    for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)

        losses.append(loss.item())

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()

    mean_loss = sum(losses) / len(losses)
    scheduler.step(mean_loss)
    print(f"Cost at epoch {epoch} is {mean_loss}")



100%|██████████| 938/938 [00:13<00:00, 67.11it/s] 


Cost at epoch 0 is 2.321278190816135


100%|██████████| 938/938 [00:06<00:00, 138.98it/s]


Cost at epoch 1 is 2.310201260835123


100%|██████████| 938/938 [00:06<00:00, 134.41it/s]


Cost at epoch 2 is 2.3097856357407722


100%|██████████| 938/938 [00:06<00:00, 139.60it/s]


Cost at epoch 3 is 2.3108343953516948


100%|██████████| 938/938 [00:06<00:00, 138.52it/s]


Cost at epoch 4 is 2.3102616254708916


100%|██████████| 938/938 [00:06<00:00, 140.60it/s]


Cost at epoch 5 is 2.310311985676731


100%|██████████| 938/938 [00:06<00:00, 139.80it/s]


Cost at epoch 6 is 2.3089829990858717


100%|██████████| 938/938 [00:06<00:00, 139.78it/s]


Cost at epoch 7 is 2.3110652305424084


100%|██████████| 938/938 [00:06<00:00, 137.60it/s]


Cost at epoch 8 is 2.310268867498776


100%|██████████| 938/938 [00:06<00:00, 139.42it/s]


Cost at epoch 9 is 2.309663248468818


100%|██████████| 938/938 [00:06<00:00, 139.47it/s]


Cost at epoch 10 is 2.3109934416406954


100%|██████████| 938/938 [00:06<00:00, 139.44it/s]


Cost at epoch 11 is 2.3103604616640983


100%|██████████| 938/938 [00:06<00:00, 138.03it/s]


Epoch 00013: reducing learning rate of group 0 to 1.0000e-02.
Cost at epoch 12 is 2.3091795294523747


100%|██████████| 938/938 [00:06<00:00, 136.46it/s]


Cost at epoch 13 is 2.3020813831134137


100%|██████████| 938/938 [00:06<00:00, 137.49it/s]


Cost at epoch 14 is 2.302059755904842


100%|██████████| 938/938 [00:06<00:00, 139.19it/s]


Cost at epoch 15 is 2.30204564549013


100%|██████████| 938/938 [00:06<00:00, 138.70it/s]


Cost at epoch 16 is 2.302044956668862


100%|██████████| 938/938 [00:06<00:00, 137.23it/s]


Cost at epoch 17 is 2.3022354144785706


100%|██████████| 938/938 [00:06<00:00, 138.27it/s]


Cost at epoch 18 is 2.3021354937095886


100%|██████████| 938/938 [00:06<00:00, 136.62it/s]


Epoch 00020: reducing learning rate of group 0 to 1.0000e-03.
Cost at epoch 19 is 2.3023791979116672


100%|██████████| 938/938 [00:06<00:00, 138.89it/s]


Cost at epoch 20 is 2.3013595505309765


100%|██████████| 938/938 [00:06<00:00, 138.44it/s]


Cost at epoch 21 is 2.3013134800803177


100%|██████████| 938/938 [00:06<00:00, 134.47it/s]


Cost at epoch 22 is 2.3013108323123665


100%|██████████| 938/938 [00:06<00:00, 138.03it/s]


Cost at epoch 23 is 2.301304102452325


100%|██████████| 938/938 [00:06<00:00, 138.15it/s]


Cost at epoch 24 is 2.301274360878381


100%|██████████| 938/938 [00:06<00:00, 138.84it/s]


Cost at epoch 25 is 2.301305437393026


100%|██████████| 938/938 [00:06<00:00, 138.32it/s]


Epoch 00027: reducing learning rate of group 0 to 1.0000e-04.
Cost at epoch 26 is 2.301274395446533


100%|██████████| 938/938 [00:06<00:00, 137.07it/s]


Cost at epoch 27 is 2.3012399871720435


100%|██████████| 938/938 [00:06<00:00, 139.04it/s]


Cost at epoch 28 is 2.3012154747936515


100%|██████████| 938/938 [00:06<00:00, 138.91it/s]


Cost at epoch 29 is 2.3011994128034057


100%|██████████| 938/938 [00:06<00:00, 139.17it/s]


Cost at epoch 30 is 2.301195750612694


100%|██████████| 938/938 [00:06<00:00, 137.87it/s]


Cost at epoch 31 is 2.3011854152435434


100%|██████████| 938/938 [00:06<00:00, 139.37it/s]


Epoch 00033: reducing learning rate of group 0 to 1.0000e-05.
Cost at epoch 32 is 2.3011842742403434


100%|██████████| 938/938 [00:06<00:00, 139.19it/s]


Cost at epoch 33 is 2.3011584238711196


100%|██████████| 938/938 [00:06<00:00, 139.95it/s]


Cost at epoch 34 is 2.3011674118448675


100%|██████████| 938/938 [00:06<00:00, 138.92it/s]


Cost at epoch 35 is 2.30116086194256


100%|██████████| 938/938 [00:06<00:00, 140.14it/s]


Cost at epoch 36 is 2.3011702105943073


100%|██████████| 938/938 [00:06<00:00, 139.15it/s]


Cost at epoch 37 is 2.3011668612962084


100%|██████████| 938/938 [00:06<00:00, 137.98it/s]


Epoch 00039: reducing learning rate of group 0 to 1.0000e-06.
Cost at epoch 38 is 2.301165884237554


100%|██████████| 938/938 [00:06<00:00, 140.09it/s]


Cost at epoch 39 is 2.3011610497797985


100%|██████████| 938/938 [00:06<00:00, 137.21it/s]


Cost at epoch 40 is 2.301161902291434


100%|██████████| 938/938 [00:08<00:00, 111.95it/s]


Cost at epoch 41 is 2.3011642303039777


100%|██████████| 938/938 [00:06<00:00, 137.01it/s]


Cost at epoch 42 is 2.301168133455045


100%|██████████| 938/938 [00:06<00:00, 139.15it/s]


Cost at epoch 43 is 2.301164285206337


100%|██████████| 938/938 [00:06<00:00, 135.99it/s]


Epoch 00045: reducing learning rate of group 0 to 1.0000e-07.
Cost at epoch 44 is 2.3011695044889633


100%|██████████| 938/938 [00:06<00:00, 138.92it/s]


Cost at epoch 45 is 2.301160708673473


100%|██████████| 938/938 [00:06<00:00, 140.28it/s]


Cost at epoch 46 is 2.3011594863334444


100%|██████████| 938/938 [00:06<00:00, 137.88it/s]


Cost at epoch 47 is 2.3011600226481588


100%|██████████| 938/938 [00:07<00:00, 130.96it/s]


Cost at epoch 48 is 2.301160768151029


100%|██████████| 938/938 [00:06<00:00, 137.54it/s]

Cost at epoch 49 is 2.301153648382565



