# Watch the input files

In [145]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

def show_input_dir():
    for dirname, _, filenames in os.walk('../input'):
        for filename in filenames:
            print(os.path.join(dirname, filename))

show_input_dir()

# Any results you write to the current directory are saved as output.

../input/digit-recognizer/test.csv
../input/digit-recognizer/train.csv
../input/digit-recognizer/sample_submission.csv


# Import required libraries

In [146]:
import pandas as pd

In [147]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import sampler
import torch.nn.functional as F

In [148]:
import matplotlib.pyplot as plt

%matplotlib inline

# Declare common parameters

In [149]:
image_shape = (1, 28, 28)
dtype = torch.cuda.FloatTensor
batch_size = 100

# Declare all the functions we need

## Reading the data, sampling, data loaders

In [150]:

def read_input_data():
    sample_submission = pd.read_csv("../input/digit-recognizer/sample_submission.csv")
    test = pd.read_csv("../input/digit-recognizer/test.csv")
    train = pd.read_csv("../input/digit-recognizer/train.csv")
    return train, test

In [151]:
class ChunkSampler(sampler.Sampler):
    def __init__(self, samples_count, offset=0):
        self.samples_count = samples_count
        self.offset = offset

    def __iter__(self):
        return iter(range(self.offset, self.offset + self.samples_count))

    def __len__(self):
        return self.samples_count

In [152]:
def get_loader(data, num_values, batch_size=batch_size, start=0):
    labels = data['label'].values
    images = data.drop('label',axis=1).values

    images_tensor = torch.tensor(images / 255)
    labels_tensor = torch.tensor(labels)
    tensor_dataset = TensorDataset(images_tensor, labels_tensor)

    loader = DataLoader(tensor_dataset, batch_size=batch_size,
                              sampler=ChunkSampler(num_values, start))
    return loader

In [153]:
def get_test_loader(data, num_values, batch_size=batch_size, start=0):
    images = data.values

    images_tensor = torch.tensor(images / 255)
    tensor_dataset = TensorDataset(images_tensor)

    loader = DataLoader(tensor_dataset, batch_size=batch_size,
                              sampler=ChunkSampler(num_values, start))
    return loader

## Declare custom layers to make the data flatten and unflatten

In [154]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size()
        return x.view(N, -1)
    
class Unflatten(nn.Module):
    def __init__(self, N=-1, C=128, H=7, W=7):
        super(Unflatten, self).__init__()
        self.N = N
        self.C = C
        self.H = H
        self.W = W
    def forward(self, x):
        return x.view(self.N, self.C, self.H, self.W)

## Define network model

In [155]:
class Alex(nn.Module):
    def __init__(self, image_shape):
        super(Alex, self).__init__()
        self.unflat = Unflatten(-1, *image_shape)
        self.conv1 = nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2)
        self.relu1 = nn.ReLU(inplace=True)
        self.mp1 = nn.MaxPool2d(kernel_size=1)
        self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2)
        self.relu2 = nn.ReLU(inplace=True)
        self.mp2 = nn.MaxPool2d(kernel_size=5, stride=2)
        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
        self.relu4 = nn.ReLU(inplace=True)
        self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
        self.relu4 = nn.ReLU(inplace=True)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.relu5 = nn.ReLU(inplace=True)
        self.mp3 = nn.MaxPool2d(kernel_size=1)
        self.flat = Flatten()
        self.do1 = nn.Dropout()
        self.lin1 = nn.Linear(256, 4069)
        self.relu6 = nn.ReLU(inplace=True)
        self.do2 = nn.Dropout()
        self.lin2 = nn.Linear(4069, 4069)
        self.relu7 = nn.ReLU(inplace=True)
        self.lin3 = nn.Linear(4069, 10)
    def forward(self, x):
        x = self.unflat(x)
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.mp1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.mp2(x)
        x = self.conv3(x)
        x = self.relu4(x)
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.conv5(x)
        x = self.relu5(x)
        x = self.mp3(x)
        x = self.flat(x)
        x = self.do1(x)
        x = self.lin1(x)
        x = self.relu6(x)
        x = self.do2(x)
        x = self.lin2(x)
        x = self.relu7(x)
        x = self.lin3(x)
        return x

## Declare functions to calculate accuracy and loss

In [156]:
def evaluate_accuracy(predicted, labels):
    correct = (predicted == labels).sum().item()
    accuracy = correct / len(labels)
    
    return accuracy, correct

In [157]:
def evaluate_loss(criterion, outputs, labels):
    loss = criterion(outputs, labels)
    value = loss.data.item()
    
    return value, loss
    

## Declare MetricCalculator

MetricCalculator calculates any metric (accuracy, loss, etc) over the epochs.

In [158]:
class MetricCalculator:
    def __init__(self):
        self.data = []
        self.cache = 0
        self.batches_count = 0
    def add_batch(self, item):
        self.cache += item
        self.batches_count += 1
    def get_current(self):
        return self.cache / self.batches_count
    def submit(self):
        self.data.append(self.get_current())
        self.cache = 0
        self.batches_count = 0
    def average(self):
        return np.sum(self.data)/ len(self.data)
    def get(self):
        return self.data

Predict only one batch.

In [159]:
def predict_batch(model, images):
    batch = images.type(dtype)
    outputs = model(batch).type(dtype)
    _, predicted = torch.max(outputs.data, 1)
    
    return predicted, outputs

Make predictions for all the data.

In [160]:
def predict(model, loader):
    result = []
    for images, in loader:
        predicted, _ = predict_batch(model, images)
        result.extend(predicted.cpu().detach().numpy())
    
    return result
    

Learn network and calculate metrics. "learn" flag lets us not to learn network if we need to validate network only.

In [161]:
def run_cnn(model, optimizer, criterion, loader, batch_size=batch_size, print_each=250, num_epochs=10, learn=True):
    if learn:
        model.train()
    else:
        model.eval()
    
    batches_count = len(loader.sampler) // batch_size
    loss_calculator = MetricCalculator()
    accuracy_calculator = MetricCalculator()
    for epoch in range(num_epochs):
        iteration = 0
        for images, labels in loader:
            labels = labels.type(torch.cuda.LongTensor)
            
            if learn:
                optimizer.zero_grad()
                
            predicted, outputs = predict_batch(model, images)
            accuracy, _ = evaluate_accuracy(predicted, labels)
            loss_value, loss = evaluate_loss(criterion, outputs, labels)
            
            accuracy_calculator.add_batch(accuracy)
            loss_calculator.add_batch(loss_value)

            if learn:
                loss.backward()
                optimizer.step()
            
            iteration += 1
            
            if iteration % print_each == 0:
                print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f,  Accuracy: %.4f,' %(epoch+1, num_epochs, iteration+1, len(loader.sampler)//batch_size, loss_calculator.get_current(), accuracy_calculator.get_current()))
        accuracy_calculator.submit()
        loss_calculator.submit()
        
    return loss_calculator.get(), accuracy_calculator.get()
                

## Define PlotBuilder, that lets us quickly generate plots.

In [162]:
class PlotBuilder:
    def __init__(self, xlabel='Epoch', ylabel='Loss'):
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
    def add_line(self, values, label, color='r'):
        plt.plot(range(len(values)), values, color, label=label)
        return self
    def title(self, title):
        plt.title(title)
        return self
    def legend(self):
        plt.legend()
        return self
    def show(self):
        plt.show()

Make submission for the predictions.

In [163]:
def make_submission(predictions, filename="my_submissions.csv"):
    submissions=pd.DataFrame({"ImageId": list(range(1,len(predictions)+1)),
                         "Label": predictions})
    submissions.to_csv(filename, index=False, header=True)

# All the work

Read the data.

In [164]:
train, test = read_input_data()

Define train and validation datasets sizes.

In [165]:
train_num = 30000
validation_num = int(len(train) - train_num)
print('Train items count', train_num)
print('Validation items count', validation_num)

Train items count 30000
Validation items count 12000


Define dataloaders.

In [166]:
train_loader = get_loader(train, train_num)
validation_loader = get_loader(train, validation_num, start=train_num)
test_loader = get_test_loader(test, len(test))

Define the model and make it use GPU.

In [167]:
model = Alex(image_shape)
model.cuda()

Define optimizer.

In [168]:
optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.5, 0.999))

Define loss.

In [169]:
criterion = nn.CrossEntropyLoss().cuda()

Define number of epochs.

In [170]:
num_epochs=12

Train the network.

In [None]:
train_loss, train_accuracy = run_cnn(model, optimizer, criterion, train_loader, num_epochs=num_epochs)

Validate the network.

In [None]:
validation_loss, validation_accuracy = run_cnn(model, optimizer, criterion, validation_loader, learn=False, num_epochs=num_epochs)

Generate plot to see validation and train accuracy and loss.

In [None]:
PlotBuilder().legend().add_line(train_loss, 'Train Loss', color='r').add_line(validation_loss, 'Validation Loss', color='b').show()
PlotBuilder(ylabel='Accuracy').legend().add_line(train_accuracy, 'Train Accauracy', color='r').add_line(validation_accuracy, 'Validation Accauracy', color='b').show()

Train network more with validations dataset.

In [None]:
run_cnn(model, optimizer, criterion, validation_loader, learn=True, num_epochs=num_epochs)
pass

Make predictions for the test dataset.

In [None]:
predictions = predict(model, test_loader)

In [None]:
make_submission(predictions)