In [25]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from dlc_practical_prologue import load_data
from models import CNN

from torch.utils.data import DataLoader, TensorDataset

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [44]:
class Configuration:
    def __init__(self):
        self.batch_size = 50
        self.epochs = 20
        self.lr = 0.001
        self.log_interval = 5
        self.seed = 42
        self.save_model = False
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = CNN().to(self.device)
        self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr)
        self.gradient_compression = None
        self.criterion = torch.nn.CrossEntropyLoss()
        self.model_name = "mnist_cnn"


In [56]:
def train(config, train_loader, epoch, logging=True):
    # put model in train mode, we need gradients
    config.model.train()
    train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        config.optimizer.zero_grad()
        output = config.model(data)
        # get the basic loss for our main task
        total_loss = config.criterion(output, target)
        total_loss.backward()
        train_loss += total_loss.item()
        config.optimizer.step()
    _, train_accuracy = test(config, train_loader, logging=False)
    if logging:
        print(f'Train Epoch: {epoch} Loss: {total_loss.item():.6f}, Train accuracy: {train_accuracy}')
    return train_loss, train_accuracy


def test(config, test_loader, logging=True):
    # put model in eval mode, disable dropout etc.
    config.model.eval()
    test_loss = 0
    correct = 0
    # disable grad to perform testing quicker
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(config.device), target.to(config.device)
            output = config.model(data)
            test_loss += config.criterion(output, target).item()
            # prediction is an output with maximal probability
            pred = output.argmax(1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    if logging:
        print(f'Test set: Average loss: {test_loss:.4f}, '
              f'Test accuracy: {correct} / {len(test_loader.dataset)} '
              f'({test_accuracy:.0f}%)\n')
    return test_loss, test_accuracy


def get_data_loaders(batch_size, num_pairs=1000, percentage_val=0):
    val_loader = None
    train_input, train_target, test_input, test_target = load_data(flatten = False)
    train_dataset = TensorDataset(train_input, train_target)
    # if validation set is needed randomly split training set
    if percentage_val:
        val_dataset, train_dataset = torch.utils.data.random_split(train_dataset,
                                                               (int(percentage_val*len(train_dataset)),
                                                                int((1-percentage_val)*len(train_dataset)))
                                                               )
        val_loader = DataLoader(dataset=val_dataset,
                                batch_size=batch_size,
                                shuffle=True)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    test_loader = DataLoader(dataset=TensorDataset(test_input, test_target),
                             batch_size=batch_size)
    return train_loader, val_loader, test_loader

In [57]:
config = Configuration()
torch.manual_seed(config.seed)
train_loader, val_loader, test_loader = get_data_loaders(config.batch_size)

for epoch in range(1, config.epochs + 1):
    train(config, train_loader, epoch)
    test(config, test_loader)

if config.save_model:
    torch.save(config.model.state_dict(), f"{config.model_name}.pt")


* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 5000 train and 5000 test samples
Train Epoch: 1 Loss: 0.341668, Train accuracy: 91.86
Test set: Average loss: 0.0078, Test accuracy: 4399 / 5000 (88%)

Train Epoch: 2 Loss: 0.295877, Train accuracy: 95.36
Test set: Average loss: 0.0057, Test accuracy: 4554 / 5000 (91%)

Train Epoch: 3 Loss: 0.180937, Train accuracy: 97.08
Test set: Average loss: 0.0047, Test accuracy: 4636 / 5000 (93%)

Train Epoch: 4 Loss: 0.176818, Train accuracy: 97.78
Test set: Average loss: 0.0043, Test accuracy: 4662 / 5000 (93%)

Train Epoch: 5 Loss: 0.037232, Train accuracy: 98.34
Test set: Average loss: 0.0038, Test accuracy: 4693 / 5000 (94%)

Train Epoch: 6 Loss: 0.022646, Train accuracy: 98.68
Test set: Average loss: 0.0037, Test accuracy: 4704 / 5000 (94%)

Train Epoch: 7 Loss: 0.033615, Train accuracy: 99.08
Test set: Average loss: 0.0035, Test accuracy: 4705 / 5000 (94%)

Train Epoch: 8 Loss: 0.017693, Train accuracy: 98.96
Test 

In [47]:
for data, labels in train_loader:
    print(data.shape)

torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
torch.Size([50, 1, 28, 28])
