## 0. Import Library

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
import torch.optim as optim

from keras.utils import to_categorical
from keras.utils import plot_model

from sklearn.model_selection import train_test_split

## 1. Import Dataset

In [2]:
# Download File
Train = pd.read_csv("digit-recognizer/train.csv")
Test = pd.read_csv("digit-recognizer/test.csv")
Submission = pd.read_csv("digit-recognizer/sample_submission.csv")

print("OK!")

OK!


In [3]:
print("Train shape: {}".format(Train.shape))
print("Test shape: {}".format(Test.shape))
print("Submission shape: {}".format(Submission.shape))

Train shape: (42000, 785)
Test shape: (28000, 784)
Submission shape: (28000, 2)


## 2. Data Modifications

In [4]:
# Drop labels from Trainin Set
X = Train.drop(['label'], 1).values
# Create labels
y = Train['label'].values

# Test data
X_Test = Test.values


In [5]:
# Split train and validation
training_images, validation_images, training_labels, validation_labels = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=0)

In [6]:
# Reshape Images
training_images = training_images.reshape(training_images.shape[0], 28, 28)
validation_images = validation_images.reshape(validation_images.shape[0], 28, 28)
test_images = X_Test.reshape(X_Test.shape[0], 28, 28)

print(training_images.shape)
print(validation_images.shape)
print(test_images.shape)

(33600, 28, 28)
(8400, 28, 28)
(28000, 28, 28)


In [7]:
# Training Tensor
training_images_tensor = torch.tensor(training_images)/255.0
training_labels_tensor = torch.tensor(training_labels)
training_tensor = torch.utils.data.TensorDataset(training_images_tensor, training_labels_tensor)

# Validation Tensor
validation_images_tensor = torch.tensor(validation_images)/255.0
validation_labels_tensor = torch.tensor(validation_labels)
validation_tensor = torch.utils.data.TensorDataset(validation_images_tensor, validation_labels_tensor)

# Test Tensor
test_images_tensor = torch.tensor(test_images)/255.0

In [8]:
# Load Data for  Train, Validation, Test
train_loader = torch.utils.data.DataLoader(training_tensor, batch_size=16, num_workers=2, shuffle=True)
val_loader = torch.utils.data.DataLoader(validation_tensor, batch_size=16, num_workers=2, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_images_tensor, batch_size=16, num_workers=2, shuffle=False)

In [9]:
# Check GPU
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('Training on CPU...')
else:
    print('Training on GPU...')

Training on GPU...


## 3. Training

In [10]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Convolution
        self.conv_block = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2) 
        )
        
        # Linear
        self.linear_block = nn.Sequential(
            nn.Dropout(p=0.2),
            nn.Linear(128*7*7, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.25),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(64, 10)
        )
        
    def forward(self, x):
        x = self.conv_block(x)
        x = x.view(x.size(0), -1)
        x = self.linear_block(x)
        
        return x


# Model
conv_model = Net()

# Use GPU if avaiable
if train_on_gpu:
    conv_model.cuda()

In [11]:
# define optimizer and loss function
optimizer = optim.Adam(params=conv_model.parameters(), lr=0.003)
criterion = nn.CrossEntropyLoss()

# Optimizer Parameters
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [12]:
# Tracking
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(25 + 1)]

In [13]:
def train_model(epoch):
    conv_model.train()
    
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.unsqueeze(1)
        data, target = data.cuda(), target.cuda()
        
        optimizer.zero_grad()
        output = conv_model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if (batch_idx + 1)% 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch+1, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.item()))
            train_losses.append(loss.item())
            train_counter.append((batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
            
def evaluate(data_loader):
    conv_model.eval()
    loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in data_loader:
            data = data.unsqueeze(1)
            data, target = data.cuda(), target.cuda()

            output = conv_model(data)

            loss += F.cross_entropy(output, target, size_average=False).item()

            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
        
    loss /= len(data_loader.dataset)
    test_losses.append(loss)
    print('\nAverage Val Loss: {:.4f}, Val Accuracy: {}/{} ({:.3f}%)\n'.format(
        loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

In [14]:
num_epochs = 100

for epoch in range(num_epochs):
    train_model(epoch)
    evaluate(val_loader)
    exp_lr_scheduler.step()






Average Val Loss: 0.0498, Val Accuracy: 8273/8400 (98.488%)


Average Val Loss: 0.0401, Val Accuracy: 8303/8400 (98.845%)


Average Val Loss: 0.0361, Val Accuracy: 8309/8400 (98.917%)


Average Val Loss: 0.0325, Val Accuracy: 8315/8400 (98.988%)


Average Val Loss: 0.0312, Val Accuracy: 8316/8400 (99.000%)


Average Val Loss: 0.0264, Val Accuracy: 8325/8400 (99.107%)


Average Val Loss: 0.0253, Val Accuracy: 8335/8400 (99.226%)


Average Val Loss: 0.0207, Val Accuracy: 8347/8400 (99.369%)


Average Val Loss: 0.0173, Val Accuracy: 8361/8400 (99.536%)


Average Val Loss: 0.0176, Val Accuracy: 8361/8400 (99.536%)


Average Val Loss: 0.0189, Val Accuracy: 8354/8400 (99.452%)


Average Val Loss: 0.0193, Val Accuracy: 8352/8400 (99.429%)


Average Val Loss: 0.0185, Val Accuracy: 8360/8400 (99.524%)


Average Val Loss: 0.0181, Val Accuracy: 8358/8400 (99.500%)


Average Val Loss: 0.0184, Val Accuracy: 8356/8400 (99.476%)


Average Val Loss: 0.0171, Val Accuracy: 8360/8400 (99.524%)


Average

In [16]:
def make_predictions(data_loader):
    conv_model.eval()
    test_preds = torch.LongTensor()
    
    for i, data in enumerate(data_loader):
        data = data.unsqueeze(1)
        
        if torch.cuda.is_available():
            data = data.cuda()
            
        output = conv_model(data)
        
        preds = output.cpu().data.max(1, keepdim=True)[1]
        test_preds = torch.cat((test_preds, preds), dim=0)
        
    return test_preds

In [17]:
test_set_preds = make_predictions(test_loader)

In [18]:
submission_df = pd.read_csv("digit-recognizer/sample_submission.csv")

In [19]:
submission_df['Label'] = test_set_preds.numpy().squeeze()
submission_df.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3


In [20]:
submission_df.to_csv('submission.csv', index=False)