Hello There

**Here are some useful links to get us all up to speed**

https://pytorch.org/tutorials/beginner/basics/data_tutorial.html

https://pytorch.org/vision/stable/datasets.html#cifar


In [None]:
#Import modules

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torch import nn, optim, cuda
import torch.nn.functional as F
import numpy as np
from PIL import Image
import math

In [None]:
#Load data

device = torch.device('cuda')
#device = torch.device('cpu')
#batch_sz = 128
batch_sz = 32
#NET, NET2 DATALOADERS
training_data = datasets.CIFAR10(
    root="data",
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ColorJitter(0.25, 0.25, 0.25, 0.25),
        transforms.RandomRotation(30),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

test_data = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

training_data = DataLoader(training_data, batch_size=batch_sz, shuffle=True)
test_data = DataLoader(test_data, batch_size=batch_sz, shuffle=True)

"""#CNN, CNN2 DATALOADERS
training_data = datasets.CIFAR10(
    root="data",
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.Resize((32,32)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]))

test_data = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]))

training_data = DataLoader(dataset=training_data, batch_size=batch_sz, shuffle=True)
test_data = DataLoader(dataset=test_data, batch_size=batch_sz, shuffle=False)
"""
#New attempted dataloaders
"""
training_data = datasets.CIFAR10(
    root="data",
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.RandomAffine(0,scale=(0.5,1.5)),
        transforms.ColorJitter(0.25, 0.25, 0.25, 0.25),
        transforms.RandomRotation(20),
        transforms.RandomVerticalFlip(0.4),
        transforms.RandomHorizontalFlip(0.4),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),

    ])
)

training_data = DataLoader(training_data, batch_size=batch_sz, shuffle=True,drop_last=True)

test_data = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
)
test_data = DataLoader(test_data, batch_size=batch_sz, shuffle=True)
"""

Files already downloaded and verified
Files already downloaded and verified


'\ntraining_data = datasets.CIFAR10(\n    root="data",\n    train=True,\n    download=True,\n    transform=transforms.Compose([\n        transforms.RandomAffine(0,scale=(0.5,1.5)),\n        transforms.ColorJitter(0.25, 0.25, 0.25, 0.25),\n        transforms.RandomRotation(20),\n        transforms.RandomVerticalFlip(0.4),\n        transforms.RandomHorizontalFlip(0.4),\n        transforms.ToTensor(),\n        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),\n\n    ])\n)\n\ntraining_data = DataLoader(training_data, batch_size=batch_sz, shuffle=True,drop_last=True)\n\ntest_data = datasets.CIFAR10(\n    root="data",\n    train=False,\n    download=True,\n    transform=transforms.Compose([\n        transforms.ToTensor(),\n        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),\n    ])\n)\ntest_data = DataLoader(test_data, batch_size=batch_sz, shuffle=True)\n'

In [None]:
#Convolutional Neural Networks

#V1 code
class V1(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=0)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=0)
        self.fc1 = nn.Linear(32 * 5 * 5, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.2)
    def forward(self, x):
        batchsz = x.size(0)
        x = self.conv1(x)
        x = F.relu(x)
        x = F.avg_pool2d(x, kernel_size=2, stride=2, padding=0)
        x = self.dropout(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.avg_pool2d(x, kernel_size=2, stride=2, padding=0)
        x = x.view(batchsz, -1)
        x = self.dropout(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = F.relu(x)
        return x

#BENCHMARK NETWORK
"""
class NET(nn.Module):
  def __init__(self):
    super().__init__()
    self.net = nn.Sequential(
        nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, bias=False),
        nn.BatchNorm2d(128),
        nn.ReLU(True),
        nn.AvgPool2d( kernel_size=2, stride=2, padding=0),
        nn.Conv2d(128, 256, kernel_size=3, stride=2, bias=False),
        nn.BatchNorm2d(256),
        nn.ReLU(True),
        nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
        nn.Conv2d(256, 512, kernel_size=3, stride=2, bias=False))
 
    self.res1 = nn.Sequential(
        nn.BatchNorm2d(512),
        nn.ReLU(True),
        nn.Conv2d(512, 512, 3, 1, 1, bias=False),
        nn.BatchNorm2d(512),
        nn.ReLU(True),
        nn.Conv2d(512, 512, 3, 1, 1, bias=False))
 
    self.res2 = nn.Sequential(
        nn.BatchNorm2d(512),
        nn.ReLU(True),
        nn.Conv2d(512, 512, 3, 1, 1, bias=False),
        nn.BatchNorm2d(512),
        nn.ReLU(True),
        nn.Conv2d(512, 512, 3, 1, 1, bias=False))
 
    self.net2 = nn.Sequential(
        nn.Conv2d(512, 512, kernel_size=3, stride=1, bias=False),
        nn.BatchNorm2d(512),
        nn.ReLU(True),
        nn.AvgPool2d(kernel_size=3, stride=2, padding=0)
        nn.Dropout(0.2))
 
    self.fc = nn.Sequential(
        nn.Linear(512, 10),
        nn.ReLU(True),
        nn.Dropout(0.2))
 
 
  def forward(self, image):
    feature = self.net(image)
    feature = feature + self.res1(feature)
    feature = feature + self.res2(feature)
    feature = self.net2(feature)
    feature = feature.view(-1, 512)
    return self.fc(feature)
""" 
#TEST NETWORK
class NET2(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 128, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
            nn.Conv2d(128, 256, kernel_size=3, stride=2, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.Conv2d(256, 512, kernel_size=3, stride=2, bias=False))

        self.res1 = nn.Sequential(
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.Conv2d(512, 512, 3, 1, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
            nn.Conv2d(512, 512, 3, 1, 1, bias=False))

        self.res2 = nn.Sequential(
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.Conv2d(512, 512, 3, 1, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
            nn.Conv2d(512, 512, 3, 1, 1, bias=False))

        self.net2 = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, stride=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.Dropout(0.2))

        self.fc = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(True),
            nn.Dropout(0.2),
            nn.Linear(128, 10))

    def forward(self, image):
        feature = self.net(image)
        feature = feature + self.res1(feature)
        feature = feature + self.res2(feature)
        feature = self.net2(feature)
        feature = feature.view(-1, 256)
        return self.fc(feature)


#BASED ON VGG-13 with Batch Normalisation
#https://github.com/chengyangfu/pytorch-vgg-cifar10/blob/master/vgg.py , https://www.kaggle.com/vikasbhadoria/cifar10-high-accuracy-model-build-on-pytorch
"""
class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()
        self.net = nn.Sequential(nn.Conv2d(3, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
                                 nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
                                 nn.MaxPool2d(kernel_size=2, stride=2),
                                 nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
                                 nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
                                 nn.MaxPool2d(kernel_size=2, stride=2),
                                 nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
                                 nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
                                 nn.MaxPool2d(kernel_size=2, stride=2),
                                 nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
                                 nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
                                 nn.MaxPool2d(kernel_size=2, stride=2),
                                 nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
                                 nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
                                 nn.MaxPool2d(kernel_size=2, stride=2),
                                )

        self.classifier = nn.Sequential(
                                        nn.Dropout(0.2),
                                        nn.Linear(512, 512),
                                        nn.ReLU(True),
                                        nn.Dropout(0.5),
                                        nn.Linear(512, 512),
                                        nn.ReLU(True),
                                        nn.Linear(512, 10),
                                       )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                m.bias.data.zero_()

    def forward(self, x):
        x = self.net(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
"""
# https://arxiv.org/pdf/1409.1556.pdf
"""
class VGG16(nn.Module):

    def __init__(self, layer_sizes, use_softmax=False):
        super(VGG, self).__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=2, stride=2)
            )
        
        self.classifier = nn.Sequential(
                  nn.Linear(512, 4096),
                  nn.ReLU(True),
                  nn.Dropout(),
                  nn.Linear(4096, 4096),
                  nn.ReLU(True),
                  nn.Dropout(),
                  nn.Linear(4096, 10)
                  )
        self.use_softmax = use_softmax
  
    def forward(self, x):
        x = self.network(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        if self.use_softmax:
            x = F.log_softmax(x, dim=1)
        return x
"""

'\nclass VGG16(nn.Module):\n\n    def __init__(self, layer_sizes, use_softmax=False):\n        super(VGG, self).__init__()\n        self.network = nn.Sequential(\n            nn.Conv2d(3, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),\n            nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),\n            \n            nn.MaxPool2d(kernel_size=2, stride=2),\n\n            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),\n            nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),\n            \n            nn.MaxPool2d(kernel_size=2, stride=2),\n            \n            nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True),\n            nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True),\n            nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d

In [None]:
model = V1().to(device)
#model = NET().to(device)
#77.44% with batch_sz = 32, OptimiserC, epoch = 132 #OUTDATED
#% with batch_sz = 32, OptimiserA, epoch = 
#model = NET2().to(device)
#% with batch_sz = 32, OptimiserC, epoch = 
#% with batch_sz = 32, OptimiserA, epoch = 
#model = CNN().to(device)
#% with batch_sz = , OptimiserE, epoch = 
#% with batch_sz = , OptimiserC, epoch = 

criterion = nn.CrossEntropyLoss().to(device)
#optimiser = optim.Adam(model.parameters(), lr=0.005, weight_decay=0.002)       #OptimiserA
#optimiser = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)              #OptimiserB
optimiser = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)               #OptimiserC
#optimiser = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.002)      #OptimiserD
#optimiser = torch.optim.Adam(model.parameters(), lr=0.001)                     #OptimiserE (used in CNN)

In [None]:
#OLD TEST
acc = []
for epoch in range(1000):
    running_loss = 0
    for batch_index, (pred, target) in enumerate(training_data):
        pred, target = pred.to(device), target.to(device)
        optimiser.zero_grad()
        output = model(pred)
        loss = criterion(output, target)
        loss.backward()
        optimiser.step()
        '''
        running_loss += loss.item()
        if i % 2000 == 1999:
          print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss/2000))
          running_loss = 0
        '''
    #print(epoch, loss.item())
 
    #test
    model.eval()
    with torch.no_grad():
        total_correct = 0
        total_num = 0
        for pred, target in test_data:
            pred, target = pred.to(device), target.to(device)
            output = model(pred)
            pred = output.argmax(dim=1)
            total_correct += torch.eq(pred, target).float().sum().item()
            total_num += pred.size(0)
        
        acc.append(total_correct / total_num)
        if epoch % 5 == 0:
          print(epoch, np.argmax(acc), max(acc))
        
        #print(epoch, total_correct / total_num)
 
#torch.save(model.state_dict(), "cifar-net.pth")

0 0 0.3364
5 5 0.5226
10 10 0.577
15 15 0.5883
20 20 0.6044
25 25 0.6207
30 25 0.6207
35 35 0.6241
40 39 0.6295
45 39 0.6295
50 49 0.6325
55 49 0.6325
60 60 0.6374
65 63 0.6392
70 69 0.6398
75 69 0.6398
80 69 0.6398
85 69 0.6398
90 90 0.6446
95 90 0.6446
100 90 0.6446
105 104 0.6462
110 104 0.6462
115 115 0.6487
120 115 0.6487
125 115 0.6487
130 115 0.6487
135 115 0.6487
140 115 0.6487
145 143 0.6555
150 143 0.6555
155 143 0.6555
160 143 0.6555
165 163 0.6579
170 163 0.6579
175 163 0.6579
180 177 0.6627
185 177 0.6627
190 177 0.6627
195 177 0.6627
200 177 0.6627
205 177 0.6627
210 177 0.6627
215 177 0.6627
220 177 0.6627
225 177 0.6627
230 177 0.6627
235 177 0.6627
240 177 0.6627
245 177 0.6627
250 177 0.6627
255 177 0.6627
260 177 0.6627
265 177 0.6627
270 177 0.6627
275 273 0.6638
280 273 0.6638
285 273 0.6638
290 273 0.6638
295 273 0.6638
300 273 0.6638
305 303 0.6668
310 303 0.6668
315 303 0.6668
320 303 0.6668
325 322 0.6672
330 322 0.6672
335 322 0.6672
340 322 0.6672
345 322 0.6

In [None]:
#VERBOSE TEST
epochs = 1000
running_loss_history = []
running_corrects_history = []
test_running_loss_history = []
test_running_corrects_history = []

for epoch in range(epochs):
    running_loss = 0.0
    running_corrects = 0.0
    test_running_loss = 0.0
    test_running_corrects = 0.0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

        _, predictions = torch.max(outputs, 1)
        running_loss += loss.item()
        running_corrects += torch.sum(predictions == labels.data)
    else:
        with torch.no_grad():
            for test_inputs, test_labels in test_loader:
                test_inputs = test_inputs.to(device)
                test_labels = test_labels.to(device)
                test_outputs = model(test_inputs)

                test_loss = criterion(test_outputs, test_labels)

                _, test_predictions = torch.max(test_outputs, 1)
                test_running_loss += test_loss.item()
                test_running_corrects += torch.sum(test_predictions == test_labels.data)

        epoch_loss = running_loss/len(train_loader)
        epoch_accuracy = float(running_corrects)/len(train_loader)
        running_loss_history.append(epoch_loss)
        running_corrects_history.append(epoch_accuracy)

        test_epoch_loss = test_running_loss/len(test_loader)
        test_epoch_accuracy = float(test_running_corrects)/len(test_loader)
        test_running_loss_history.append(test_epoch_loss)
        test_running_corrects_history.append(test_epoch_accuracy)

        print(f"Epoch {epoch+1}", "|",
              f"Training Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}", "|",
              f"Testing Loss: {test_epoch_loss:.4f}, Accuracy: {test_epoch_accuracy:.4f}")


