In [1]:
import numpy as np
import torchvision
import torch
import matplotlib.pyplot as plt

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms

In [2]:
# Assume CIFAR data has mean 0.5 & standard deviation 0.25
x_mean = 0.5
x_sd = 0.25

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
def imshow(img):
    img = img * x_sd + x_mean
    np_img = img.numpy()
    plt.imshow(np.transpose(np_img, (1, 2, 0)))

In [4]:
class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.norm1 = nn.BatchNorm2d(6)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.norm2 = nn.BatchNorm2d(16)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.norm3 = nn.BatchNorm1d(120)
        self.fc2 = nn.Linear(120, 84)
        self.norm4 = nn.BatchNorm1d(84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = self.pool1(
            self.norm1(F.relu(self.conv1(x)))
        )
        x = self.pool2(
            self.norm2(F.relu(self.conv2(x)))
        )
        x = x.view(-1, 16 * 5 * 5)
        x = self.norm3(F.relu(self.fc1(x)))
        x = self.norm4(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x


In [5]:
transform = transforms.Compose([
    # Convert Pillow images to tensors
    transforms.ToTensor(),
    # Shooting for mean = 0, sd = 0.5
    transforms.Normalize((x_mean, x_mean, x_mean), (x_sd, x_sd, x_sd))
])

In [6]:
trainset = torchvision.datasets.CIFAR10(
    root="../data",
    train=True,
    download=True,
    transform=transform
)

Files already downloaded and verified


In [7]:
trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=16,
    shuffle=True,
    num_workers=8
)

In [8]:
testset = torchvision.datasets.CIFAR10(
    root="../data",
    train=False,
    download=True,
    transform=transform
)

Files already downloaded and verified


In [9]:
testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=16,
    shuffle=True,
    num_workers=8
)

In [10]:
net = Net().to(device)

In [11]:
next(net.parameters()).is_cuda

True

In [12]:
n_epochs = 10
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=1e-2)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [13]:
for epoch in range(n_epochs):
    
    # Train
    net.train()
    total_loss = 0.0
    avg_loss = 0.0
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        running_loss = loss.item()
        # Divide total loss by number of mini batches
        avg_loss = total_loss / (i + 1)
        
        if i % 1000 == 999:
            print(f"[{epoch + 1}, {i + 1}] avg. train loss: {np.round(avg_loss, 3)}, running train loss: {np.round(running_loss, 3)}")
            running_loss = 0.0
    scheduler.step()
            
    # Eval
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs)
            # Returns (value, index)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"[{epoch + 1}] val. accuracy: {np.round(correct / total * 100, 3)}")

[1, 1000] avg. train loss: 1.755, running train loss: 1.414
[1, 2000] avg. train loss: 1.664, running train loss: 1.553
[1, 3000] avg. train loss: 1.616, running train loss: 1.502
[1] val. accuracy: 51.38
[2, 1000] avg. train loss: 1.401, running train loss: 1.157
[2, 2000] avg. train loss: 1.376, running train loss: 1.161
[2, 3000] avg. train loss: 1.359, running train loss: 1.843
[2] val. accuracy: 55.05
[3, 1000] avg. train loss: 1.242, running train loss: 1.137
[3, 2000] avg. train loss: 1.228, running train loss: 1.076
[3, 3000] avg. train loss: 1.22, running train loss: 1.177
[3] val. accuracy: 61.32
[4, 1000] avg. train loss: 1.147, running train loss: 1.328
[4, 2000] avg. train loss: 1.139, running train loss: 0.908
[4, 3000] avg. train loss: 1.139, running train loss: 1.406
[4] val. accuracy: 61.63
[5, 1000] avg. train loss: 1.075, running train loss: 0.842
[5, 2000] avg. train loss: 1.074, running train loss: 0.791
[5, 3000] avg. train loss: 1.081, running train loss: 1.036
[