In [101]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose

import matplotlib.pyplot as plt

In [102]:
# hyperparameters

batch_size = 64
learning_rate = 0.01
momentum = 0.5

In [104]:
# Create data loaders.
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,)),
                              torchvision.transforms.RandomRotation(20)
                             ])),
  batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size, shuffle=True)

for X, y in test_dataloader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break

Shape of X [N, C, H, W]:  torch.Size([64, 1, 28, 28])
Shape of y:  torch.Size([64]) torch.int64


In [105]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

# Define model
class Cnn(nn.Module):
    def __init__(self):
        super(Cnn, self).__init__()


        # if padding == kernel_size // 2 => output == input
        # 2P + 1
        # padding = ( 5 - 1 ) / 2

        self.conv_net = nn.Sequential(                      # B, 1,  28,  28 
            nn.Conv2d(1, 64, kernel_size=5, padding=2),     # B, 64,  28,  28
            nn.BatchNorm2d(64),
            nn.ReLU(True),                                  # B, 64,  28,  28
            nn.MaxPool2d(2),                                # B, 64, 14, 14
            # .........
            # .......
        )

        self.linear_net = nn.Sequential(
            nn.Linear(128 * 8 * 8, 50),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(50, 10),
            nn.LogSoftmax(-1)
        )


        self.conv1 = nn.Conv2d(1, 64, kernel_size=5)
        self.conv1_bn = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=5)
        self.conv2_bn = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=5)
        self.conv3_bn = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=5)
        self.conv4_bn = nn.BatchNorm2d(128)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(128 * 8 * 8, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = self.conv_net(x)
        x = x.view(-1, 128 * 8 * 8)
        x = self.linear_net(x)
        return x
    
model = Cnn().to(device)
print(model)
# loss function
loss_fn = nn.CrossEntropyLoss()

# optimizer: stochastic gradient descent
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,momentum=momentum)
scheduler = StepLR(optimizer,step=1 , gamma=0.99)



Using cuda device
Cnn(
  (conv_net): Sequential(
    (0): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (linear_net): Sequential(
    (0): Linear(in_features=8192, out_features=50, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=50, out_features=10, bias=True)
    (4): LogSoftmax(dim=-1)
  )
  (conv1): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv1_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (conv3_bn): BatchNorm2d(128, eps=1e-05, momentum=0

TypeError: __init__() got an unexpected keyword argument 'momentum'

In [93]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    

In [94]:
# train model

epochs = 6
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model)
print("Finished")

Epoch 1
-------------------------------


TypeError: _max_pool2d() missing 1 required positional argument: 'kernel_size'