In [6]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [74]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=16,
            kernel_size=3,
            stride=5,
            padding=2
        )
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(
            in_channels=16,
            out_channels=32,
            kernel_size=3,
            stride=5,
            padding=2
        )
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        self.out = nn.Linear(32 * 5 * 5, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = x.view(x.size(0), -1)
        output = self.out(x)
        return output, x  # return x for visualization

# Instantiate the model
model = CNN()

In [75]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

12810

In [76]:


torch.manual_seed(1)
batch_size = 128
use_cuda = True
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

valid_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
    ]))
)


In [86]:
import matplotlib.pyplot as plt

def train(model, device, train_loader, optimizer, epoch, valid_loader):
    model.train()
    pbar = tqdm(train_loader)
    learning_rates = []  # List to store learning rates
    losses = []  # List to store losses
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        # Calculate current learning rate
        current_lr = optimizer.param_groups[0]['lr']
        learning_rates.append(current_lr)
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())  # Store the loss
        pbar.set_description(desc=f'loss={loss.item()} batch_id={batch_idx}')

    # Validate the model after each epoch
    validate(model, device, valid_loader)

def validate(model, device, valid_loader):
    model.eval()
    valid_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in valid_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            valid_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    valid_loss /= len(valid_loader.dataset)

    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        valid_loss, correct, len(valid_loader.dataset),
        100. * correct / len(valid_loader.dataset)))

    # Print validation score
    print('Validation Accuracy: {:.2f}%'.format(100. * correct / len(valid_loader.dataset)))







In [87]:
torch.cuda.is_available()

True

In [90]:
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
pytorch_total_params

In [89]:
device = "cuda"
model = Model().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 3):
    train(model, device, train_loader, optimizer, epoch, valid_loader)


  0%|          | 0/469 [00:00<?, ?it/s]

loss=0.09751394391059875 batch_id=468: 100%|██████████| 469/469 [00:09<00:00, 51.89it/s] 



Validation set: Average loss: 0.0423, Accuracy: 9860/10000 (99%)

Validation Accuracy: 98.60%


loss=0.05010801926255226 batch_id=468: 100%|██████████| 469/469 [00:09<00:00, 51.03it/s]  



Validation set: Average loss: 0.0287, Accuracy: 9904/10000 (99%)

Validation Accuracy: 99.04%
