## Authors:
#### Daniel Stöckein (5018039), Alexander Triol (5018451)

In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from matplotlib import pyplot as plt
%matplotlib inline
import time
import cv2
import os
from PIL import Image

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## 1. Load & Explore Data

In [33]:
root_dir = '../datasets/MixedMNIST/'
images_dir = '../datasets/MixedMNIST/images/'

## 1.1 Load train images
- ``ImageFolder`` loads images from a directory where each subdirectory will be interpreted as a class/label
https://pytorch.org/vision/stable/datasets.html
```
images_train
└───0
│   │   0_003008.png
│   │   1_231516.png
│   │   ...
.
.
.
└───9
│   │   0_031433.png
│   │   1_208115.png
│   │   ...
```

In [4]:
mnist_train = ImageFolder(
    images_dir + 'train', 
    transform=transforms.Compose(
        [
            transforms.Grayscale(num_output_channels=1),
            transforms.ToTensor()
        ]
    )
)

Show recognized classes

In [36]:
print(mnist_train.class_to_idx)

{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9}


``ImageFolder`` stores the full path to the image (among other informations)

In [6]:
img_names = [x[0] for x in mnist_train.imgs]
img_names

['../datasets/MixedMNIST/images/train\\0\\0_000013.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000035.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000044.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000060.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000149.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000165.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000175.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000232.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000251.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000260.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000326.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000390.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000417.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000481.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000554.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000606.png',
 '../datasets/MixedMNIST/images/train\\0\\0_000677.png',
 '../datasets/MixedMNIST/images

Show some additional information of the dataset

In [7]:
mnist_train[0][0].shape

torch.Size([1, 28, 28])

In [8]:
print(len(mnist_train))

240000


## 1.2 Load test images
- Since the test images are not structured in subfolders and don't have any indication of their class, we can not use ``ImageFolder``
```
images_test
│   003008.png
│   231516.png
│   069583.png
...
```
- We rather need to create a custom dataset as shown here https://pytorch.org/tutorials/beginner/data_loading_tutorial.html
- A custom dataset inherits the PyTorch class ``Dataset`` and requires to implement three functions ``__init__``, ``__len__`` and ``__getitem__``
- ``__init__``: Instantiating the Dataset object. 
- - We additionaly load the ``test.csv`` file to assign the image_id for each sample of the dataset
- ``__len__``: Simply returns the length of the dataset
- ``__getitem__``: Loads and returns a sample from the dataset at the given index. Based on this index, it indentifies the images location on disk and performs some defined transformations.
- **return**: ``tensor: image``, ``int: image_id`` 

In [42]:
class MNIST_Test(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.image_id = self.annotations.id
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 1]) # 'image' column
        image = Image.open(img_path)
        image_id = self.image_id[index]
        if self.transform is not None:
            image = self.transform(image)
            
        return image, image_id

Create a test dataset object and perform grayscale + ToTensor transformation.

In [43]:
mnist_test = MNIST_Test(
    csv_file = root_dir + 'test.csv', 
    root_dir = images_dir + 'test', 
    transform = transforms.Compose(
        [
            transforms.Grayscale(num_output_channels = 1),
            transforms.ToTensor()
        ]
    )
)

In [44]:
mnist_test.annotations.image

0        240000.png
1        240001.png
2        240002.png
3        240003.png
4        240004.png
            ...    
39995    279995.png
39996    279996.png
39997    279997.png
39998    279998.png
39999    279999.png
Name: image, Length: 40000, dtype: object

Let's show some additional information. Shape should be same as in train set

In [45]:
mnist_test[0][0].shape

torch.Size([1, 28, 28])

In [48]:
len(mnist_test)

40000

We can simply retrieve the images id like so

In [46]:
mnist_test[0][1]

240000

## 2. Preparing DataLoader
- Shuffle the training data to improve generalization and to avoid overfitting
- It is important to **not** shuffle the test dataset

In [16]:
def dloaders(batch_size):
    train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(mnist_test, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

## 3. Model definition
- We start with a DenseNet
- We ignore the fact that the images come from different datasources, since we are only interested in the class label

In [17]:
class DenseLayer(nn.Module):
    
    def __init__(self, input_channels, output_channels):
        super(DenseLayer, self).__init__()
        
        self.layer = nn.Sequential(
            nn.BatchNorm2d(input_channels),
            nn.ReLU(),
            nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1)
        )
        
    def forward(self, x):
        out = self.layer(x)
        out = torch.cat((out, x), dim=1)
        return out

In [18]:
class DenseBlock(nn.Module):
    
    def __init__(self, input_channels, output_channels, num_layers):
        super(DenseBlock, self).__init__()
        
        layers = []
        for layer_index in range(num_layers): # e.g 5 repitions when num_layers = 5
            layers.append(
                DenseLayer(
                    input_channels + layer_index * output_channels,
                    output_channels
                )
            )
            
        # https://discuss.pytorch.org/t/append-for-nn-sequential-or-directly-converting-nn-modulelist-to-nn-sequential/7104
        self.block = nn.Sequential(*layers) 
        
    def forward(self, x):
        out = self.block(x)
        return out

In [19]:
class TransitionLayer(nn.Module):
    
    def __init__(self, input_channels, output_channels):
        super(TransitionLayer, self).__init__()

        self.transition = nn.Sequential(
            nn.BatchNorm2d(input_channels),
            nn.ReLU(),
            nn.Conv2d(input_channels, output_channels, kernel_size=1),
            nn.AvgPool2d(kernel_size=2, stride=2)
        )      
        
    def forward(self, x):
        out = self.transition(x)
        return out

In [20]:
class DenseNet(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(DenseNet, self).__init__()
        
        output_channels = 4
        growth_rate = 32
        num_layers_per_block = [3, 3] # 3 layers and 2 blocks
        
        self.net_input = nn.Sequential(
            nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=3),
            nn.AvgPool2d(kernel_size=3, stride=2)
        )
        
        blocks = []
        for block_index, num_layers in enumerate(num_layers_per_block):
            blocks.append(
                DenseBlock(output_channels, growth_rate, num_layers)
            )
            output_channels += num_layers * growth_rate 
            
            if block_index < len(num_layers_per_block) - 1:
                blocks.append(
                    TransitionLayer(output_channels, 4)
                )
                output_channels = 4
                
        self.blocks = nn.Sequential(*blocks)
        
        self.net_output = nn.Sequential(
            nn.BatchNorm2d(output_channels),
            nn.ReLU(),
            nn.AdaptiveMaxPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(output_channels, num_classes)
        )
        
    def forward(self, x):  
        out = self.net_input(x)
        out = self.blocks(out)
        out = self.net_output(out)
        return out

## 4. Training procedure
- Just a standard training procedure
- However, since we dont have labeled test data, we can only compute the accuracy on the training data

In [49]:
def fit(model, train_loader, epochs, learning_rate, loss_func=nn.CrossEntropyLoss(), opt_func=torch.optim.SGD):
    
    optimizer = opt_func(model.parameters(), learning_rate) # objective function
    
    model = model.to(device)
    
    start = time.time() # measure time
    
    for epoch in range(epochs):
        
        model = model.train()
              
        for batch_index, (features, labels) in enumerate(train_loader):
            
            # gpu usage if possible
            features = features.to(device)
            labels = labels.to(device)
            
            # 1. forward
            logits = model(features)

            # 2. compute objective function (softmax, cross entropy)
            cost = loss_func(logits, labels)
            
            # 3. cleaning gradients
            optimizer.zero_grad() 

            # 4. accumulate partial derivatives
            cost.backward() 

            # 5. step in the opposite direction of the gradient
            optimizer.step() 
            
            if not batch_index % 250:
                print ('Epoch: {}/{} | Batch {}/{} | Cost: {:.4f}'.format(
                    epoch+1,
                    epochs,
                    batch_index,
                    len(train_loader),
                    cost
                ))
        
        correct, wrong, accuracy = comp_accuracy(model, train_loader)
        print ('Training: Correct[{:.0f}] | Wrong[{:.0f}] | Accuracy[{:.2f}%]'.format(
            correct,
            wrong,
            accuracy
        ), '\n')
         
    end = time.time()
    print('Training time: {:.2f} seconds on {}'.format(
        end - start, 
        device
    ))    

## 5. Metrics
- Since this is a classification problem, we are interested in the accuracy

In [50]:
def comp_accuracy(model, data_loader):
    correct = 0
    wrong = 0
    num_examples = 0
    
    # turn on eval mode if model Inherits from nn.Module
    if isinstance(model, nn.Module):
        model.eval()

    with torch.no_grad():
        for batch_index, (features, labels) in enumerate(data_loader):
            features = features.to(device)
            labels = labels.to(device)

            logits = model(features)
            _, predictions = torch.max(logits, dim=1) # single class with highest probability. simply retain indices
            
            num_examples += labels.size(0)

            correct += (predictions == labels).sum().float()
            wrong += (predictions != labels).sum().float()
            
        accuracy = correct / num_examples * 100      
        
    return correct, wrong, accuracy

## 6. Playground

In [51]:
batch_size = 100
epochs = 20
learning_rate = 0.01

train_loader, test_loader = dloaders(batch_size=batch_size) # data iters

In [32]:
model = DenseNet(1, 10)
fit(model, train_loader, epochs, learning_rate) # training

Epoch: 1/20 | Batch 0/2400 | Cost: 2.7694
Epoch: 1/20 | Batch 250/2400 | Cost: 0.6329
Epoch: 1/20 | Batch 500/2400 | Cost: 0.5016
Epoch: 1/20 | Batch 750/2400 | Cost: 0.3836
Epoch: 1/20 | Batch 1000/2400 | Cost: 0.2647
Epoch: 1/20 | Batch 1250/2400 | Cost: 0.4046
Epoch: 1/20 | Batch 1500/2400 | Cost: 0.4508
Epoch: 1/20 | Batch 1750/2400 | Cost: 0.5278
Epoch: 1/20 | Batch 2000/2400 | Cost: 0.3259
Epoch: 1/20 | Batch 2250/2400 | Cost: 0.6139
Training: Correct[208560] | Wrong[31440] | Accuracy[86.90%] 

Epoch: 2/20 | Batch 0/2400 | Cost: 0.2628
Epoch: 2/20 | Batch 250/2400 | Cost: 0.2605
Epoch: 2/20 | Batch 500/2400 | Cost: 0.2311
Epoch: 2/20 | Batch 750/2400 | Cost: 0.3349
Epoch: 2/20 | Batch 1000/2400 | Cost: 0.2033
Epoch: 2/20 | Batch 1250/2400 | Cost: 0.2025
Epoch: 2/20 | Batch 1500/2400 | Cost: 0.1860
Epoch: 2/20 | Batch 1750/2400 | Cost: 0.3482
Epoch: 2/20 | Batch 2000/2400 | Cost: 0.3923
Epoch: 2/20 | Batch 2250/2400 | Cost: 0.3017
Training: Correct[216364] | Wrong[23636] | Accurac

Epoch: 17/20 | Batch 250/2400 | Cost: 0.1085
Epoch: 17/20 | Batch 500/2400 | Cost: 0.1182
Epoch: 17/20 | Batch 750/2400 | Cost: 0.1171
Epoch: 17/20 | Batch 1000/2400 | Cost: 0.0751
Epoch: 17/20 | Batch 1250/2400 | Cost: 0.1842
Epoch: 17/20 | Batch 1500/2400 | Cost: 0.1649
Epoch: 17/20 | Batch 1750/2400 | Cost: 0.2401
Epoch: 17/20 | Batch 2000/2400 | Cost: 0.1253
Epoch: 17/20 | Batch 2250/2400 | Cost: 0.3238
Training: Correct[226662] | Wrong[13338] | Accuracy[94.44%] 

Epoch: 18/20 | Batch 0/2400 | Cost: 0.2487
Epoch: 18/20 | Batch 250/2400 | Cost: 0.2578
Epoch: 18/20 | Batch 500/2400 | Cost: 0.1050
Epoch: 18/20 | Batch 750/2400 | Cost: 0.2651
Epoch: 18/20 | Batch 1000/2400 | Cost: 0.1393
Epoch: 18/20 | Batch 1250/2400 | Cost: 0.2263
Epoch: 18/20 | Batch 1500/2400 | Cost: 0.1348
Epoch: 18/20 | Batch 1750/2400 | Cost: 0.2463
Epoch: 18/20 | Batch 2000/2400 | Cost: 0.1465
Epoch: 18/20 | Batch 2250/2400 | Cost: 0.1823
Training: Correct[227336] | Wrong[12664] | Accuracy[94.72%] 

Epoch: 19/2

## 7. Evaluation
- Now we have a trained model
- Let's predict the classes for our test dataset and store the results in a list
- - We expect a list of lists with the structure ``[[image_id_1, prediction_1], [image_id_2, prediction_2], ...]``

In [52]:
results = []

with torch.no_grad():
    for batch_index, (features, image_id) in enumerate(test_loader):
        features = features.to(device)

        logits = model(features)
        _, predictions = torch.max(logits, dim=1) # single class with highest probability. simply retain indices
            
        for i, features in enumerate(features): # now iterate over each element of the current batch
            results.append(
                [image_id[i].detach().numpy(), predictions[i].cpu().numpy()]
            )

In [53]:
results

[[array(240000, dtype=int64), array(6, dtype=int64)],
 [array(240001, dtype=int64), array(9, dtype=int64)],
 [array(240002, dtype=int64), array(0, dtype=int64)],
 [array(240003, dtype=int64), array(8, dtype=int64)],
 [array(240004, dtype=int64), array(2, dtype=int64)],
 [array(240005, dtype=int64), array(9, dtype=int64)],
 [array(240006, dtype=int64), array(6, dtype=int64)],
 [array(240007, dtype=int64), array(0, dtype=int64)],
 [array(240008, dtype=int64), array(2, dtype=int64)],
 [array(240009, dtype=int64), array(4, dtype=int64)],
 [array(240010, dtype=int64), array(2, dtype=int64)],
 [array(240011, dtype=int64), array(3, dtype=int64)],
 [array(240012, dtype=int64), array(2, dtype=int64)],
 [array(240013, dtype=int64), array(9, dtype=int64)],
 [array(240014, dtype=int64), array(7, dtype=int64)],
 [array(240015, dtype=int64), array(1, dtype=int64)],
 [array(240016, dtype=int64), array(1, dtype=int64)],
 [array(240017, dtype=int64), array(0, dtype=int64)],
 [array(240018, dtype=int64)

Finally, create a dataframe of the ``results`` list and save it as .csv

In [54]:
df = pd.DataFrame(results, columns =['id', 'classification'])
df.head()

Unnamed: 0,id,classification
0,240000,6
1,240001,9
2,240002,0
3,240003,8
4,240004,2


In [55]:
df.to_csv("submission.csv", index=False, sep=",")