In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from matplotlib import pyplot as plt
%matplotlib inline
import time
import cv2
import os
from PIL import Image

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
root_dir = '../datasets/MixedMNIST/'
images_dir = '../datasets/MixedMNIST/images/'

# Mnist Train

In [4]:
mnist_train = ImageFolder(
    images_dir + 'train', 
    transform=transforms.Compose(
        [
            transforms.Grayscale(num_output_channels=1),
            transforms.ToTensor()
        ]
    )
)

# Mnist Test

In [5]:
class MNIST_Test(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.image_id = self.annotations.id
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 1]) # 'image' column
        image = Image.open(img_path)
        image_id = self.image_id[index]
        if self.transform is not None:
            image = self.transform(image)
            
        return image, image_id

In [6]:
mnist_test = MNIST_Test(
    csv_file = root_dir + 'test.csv', 
    root_dir = images_dir + 'test', 
    transform = transforms.Compose(
        [
            transforms.Grayscale(num_output_channels = 1),
            transforms.ToTensor()
        ]
    )
)

# Model

In [35]:
class block(nn.Module):
    def __init__(
        self, in_channels, intermediate_channels, identity_downsample=None, stride=1
    ):
        super(block, self).__init__()
        self.expansion = 4
        self.conv1 = nn.Conv2d(
            in_channels, intermediate_channels, kernel_size=1, stride=1, padding=0, bias=False
        )
        self.bn1 = nn.BatchNorm2d(intermediate_channels)
        self.conv2 = nn.Conv2d(
            intermediate_channels,
            intermediate_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False
        )
        self.bn2 = nn.BatchNorm2d(intermediate_channels)
        self.conv3 = nn.Conv2d(
            intermediate_channels,
            intermediate_channels * self.expansion,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=False
        )
        self.bn3 = nn.BatchNorm2d(intermediate_channels * self.expansion)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
        self.stride = stride

    def forward(self, x):
        identity = x.clone()

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.bn3(x)

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)

        x += identity
        x = self.relu(x)
        return x


class ResNet(nn.Module):
    def __init__(self, block, layers, image_channels, num_classes):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Essentially the entire ResNet architecture are in these 4 lines below
        self.layer1 = self._make_layer(
            block, layers[0], intermediate_channels=64, stride=1
        )
        self.layer2 = self._make_layer(
            block, layers[1], intermediate_channels=128, stride=2
        )
        self.layer3 = self._make_layer(
            block, layers[2], intermediate_channels=256, stride=2
        )
        self.layer4 = self._make_layer(
            block, layers[3], intermediate_channels=512, stride=2
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * 4, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

    def _make_layer(self, block, num_residual_blocks, intermediate_channels, stride):
        identity_downsample = None
        layers = []

        # Either if we half the input space for ex, 56x56 -> 28x28 (stride=2), or channels changes
        # we need to adapt the Identity (skip connection) so it will be able to be added
        # to the layer that's ahead
        if stride != 1 or self.in_channels != intermediate_channels * 4:
            identity_downsample = nn.Sequential(
                nn.Conv2d(
                    self.in_channels,
                    intermediate_channels * 4,
                    kernel_size=1,
                    stride=stride,
                    bias=False
                ),
                nn.BatchNorm2d(intermediate_channels * 4),
            )

        layers.append(
            block(self.in_channels, intermediate_channels, identity_downsample, stride)
        )

        # The expansion size is always 4 for ResNet 50,101,152
        self.in_channels = intermediate_channels * 4

        # For example for first resnet layer: 256 will be mapped to 64 as intermediate layer,
        # then finally back to 256. Hence no identity downsample is needed, since stride = 1,
        # and also same amount of channels.
        for i in range(num_residual_blocks - 1):
            layers.append(block(self.in_channels, intermediate_channels))

        return nn.Sequential(*layers)

In [36]:
def ResNet50(img_channels=1, num_classes=10):
    return ResNet(block, [3, 4, 6, 3], img_channels, num_classes)

# Metrics

In [37]:
def comp_accuracy(model, data_loader):
    correct = 0
    wrong = 0
    num_examples = 0
    
    # turn on eval mode if model Inherits from nn.Module
    if isinstance(model, nn.Module):
        model.eval()

    with torch.no_grad():
        for batch_index, (features, labels) in enumerate(data_loader):
            features = features.to(device)
            labels = labels.to(device)

            logits = model(features)
            _, predictions = torch.max(logits, dim=1) # single class with highest probability. simply retain indices
            
            num_examples += labels.size(0)

            correct += (predictions == labels).sum().float()
            wrong += (predictions != labels).sum().float()
            
        accuracy = correct / num_examples * 100      
        
    return correct, wrong, accuracy

# Training

In [38]:
def fit(model, train_loader, epochs, learning_rate, loss_func=nn.CrossEntropyLoss(), opt_func=torch.optim.SGD):
    
    optimizer = opt_func(model.parameters(), learning_rate) # objective function
    
    model = model.to(device)
    
    start = time.time() # measure time
    
    for epoch in range(epochs):
        
        model = model.train()
              
        for batch_index, (features, labels) in enumerate(train_loader):
            
            # gpu usage if possible
            features = features.to(device)
            labels = labels.to(device)
            
            # 1. forward
            logits = model(features)

            # 2. compute objective function (softmax, cross entropy)
            cost = loss_func(logits, labels)
            
            # 3. cleaning gradients
            optimizer.zero_grad() 

            # 4. accumulate partial derivatives
            cost.backward() 

            # 5. step in the opposite direction of the gradient
            optimizer.step() 
            
            if not batch_index % 250:
                print ('Epoch: {}/{} | Batch {}/{} | Cost: {:.4f}'.format(
                    epoch+1,
                    epochs,
                    batch_index,
                    len(train_loader),
                    cost
                ))
        
        correct, wrong, accuracy = comp_accuracy(model, train_loader)
        print ('Training: Correct[{:.0f}] | Wrong[{:.0f}] | Accuracy[{:.2f}%]'.format(
            correct,
            wrong,
            accuracy
        ), '\n')
         
    end = time.time()
    print('Training time: {:.2f} seconds on {}'.format(
        end - start, 
        device
    ))   

# Fit

In [39]:
batch_size = 100
epochs = 60
learning_rate = 0.08

train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=batch_size, shuffle=False)

In [None]:
net = ResNet50()
fit(net, train_loader, epochs, learning_rate)

Epoch: 1/60 | Batch 0/2400 | Cost: 2.5593
Epoch: 1/60 | Batch 250/2400 | Cost: 1.7124
Epoch: 1/60 | Batch 500/2400 | Cost: 1.9342
Epoch: 1/60 | Batch 750/2400 | Cost: 1.9231
Epoch: 1/60 | Batch 1000/2400 | Cost: 0.6802
Epoch: 1/60 | Batch 1250/2400 | Cost: 0.3942
Epoch: 1/60 | Batch 1500/2400 | Cost: 0.6664
Epoch: 1/60 | Batch 1750/2400 | Cost: 0.4926
Epoch: 1/60 | Batch 2000/2400 | Cost: 0.2220
Epoch: 1/60 | Batch 2250/2400 | Cost: 0.5275
Training: Correct[212259] | Wrong[27741] | Accuracy[88.44%] 

Epoch: 2/60 | Batch 0/2400 | Cost: 0.4333
Epoch: 2/60 | Batch 250/2400 | Cost: 0.2671
Epoch: 2/60 | Batch 500/2400 | Cost: 0.2100
Epoch: 2/60 | Batch 750/2400 | Cost: 0.2532
Epoch: 2/60 | Batch 1000/2400 | Cost: 0.2757
Epoch: 2/60 | Batch 1250/2400 | Cost: 0.3298
Epoch: 2/60 | Batch 1500/2400 | Cost: 0.4880
Epoch: 2/60 | Batch 1750/2400 | Cost: 0.1902
Epoch: 2/60 | Batch 2000/2400 | Cost: 0.3657
Epoch: 2/60 | Batch 2250/2400 | Cost: 0.2855
Training: Correct[219501] | Wrong[20499] | Accurac

# Test

In [None]:
results = []

with torch.no_grad():
    for batch_index, (features, image_id) in enumerate(test_loader):
        features = features.to(device)

        logits = net(features)
        _, predictions = torch.max(logits, dim=1) # single class with highest probability. simply retain indices
            
        for i, features in enumerate(features): # now iterate over each element of the current batch
            results.append(
                [image_id[i].detach().numpy(), predictions[i].cpu().numpy()]
            )

In [None]:
df = pd.DataFrame(results, columns =['id', 'classification'])
df.head()

In [None]:
df.to_csv("submission.csv", index=False, sep=",")