# Starting point
In this notebook I will for the first time try out PyTorch. I will stick to methods I have already tries out, but using PyTorch instead of Keras for deeplerning.


# The Data

In [1]:
import pandas as pd
import numpy as np
import torch

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
%matplotlib inline

## Training set

In [2]:
data_df = pd.read_csv('train.csv')
digits = data_df.iloc[:, 1:].values
labels = data_df['label'].values

## Test set

In [3]:
test_df = pd.read_csv('test.csv')
digits_test = test_df.values

## Modify the data

Reshape the data.

In [6]:
img_dimensions = (28, 28, 1)

digits = digits.reshape(-1, *img_dimensions)
digits_test = digits_test.reshape(-1, *img_dimensions)

Scale the pixels.

In [7]:
digits_scaled = digits / 255
digits_test_scaled = digits_test / 255

## Validation set

In [189]:
from sklearn.model_selection import train_test_split
X, X_val, y, y_val = train_test_split(digits_scaled, labels, test_size = 8000, stratify = labels_one_hot.toarray(), random_state = 0)

## Convert to Tensors

In [190]:
# Training
X = torch.from_numpy(X).float()
X_val = torch.from_numpy(X_val).float()
y = torch.from_numpy(y).long()
y_val = torch.from_numpy(y_val).long()

# Test
X_test = torch.from_numpy(digits_test_scaled).float()

# Fully Connected Network
Let's start out basic with a fully connected network with two hidden layers.

In [191]:
import torch.nn.functional as F
import torch.nn as nn

In [192]:
class FullyConnected(nn.Module):
    
    def __init__(self):
        
        super(FullyConnected, self).__init__()
        
        self.fc1 = nn.Linear(28*28, 120)
        self.fc2 = nn.Linear(120, 120)
        self.output = nn.Linear(120, 10)
    
    def forward(self, x):
        x = F.elu(self.fc1(x))
        x = F.elu(self.fc2(x))
        x = F.softmax(self.output(x), dim=0)
        
        return x
        
        

In [193]:
fc_net = FullyConnected()

In [194]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(fc_net.parameters())

In [195]:
X.reshape(-1, 28*28).shape

torch.Size([34000, 784])

In [196]:
import torch.utils.data

train_dataset = torch.utils.data.TensorDataset(X.reshape(-1, 28*28), y)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64)

validation_dataset = torch.utils.data.TensorDataset(X_val.reshape(-1, 28*28), y_val)
validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=64)



In [211]:
# Function inspired by https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
def train_model(model, train_loader, optimizer, criterion, validation_loader = None, epochs = 2):
    
    # Only enter the validation state if there is a validation_loader
    phases = ['train']
    data_set_loaders = {'train' : train_loader, 'val' : validation_loader} 
    if validation_loader:
        phases.append('val')
        
    for epoch in range(epochs):
        
        print('Epoch {}/{}'.format(epoch, epochs - 1))
        print('-' * 10)

        for phase in phases:
            
            data_set_loader = data_set_loaders[phase]
            
            # Only update model weights based on the training data
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
            
            for i, batch in enumerate(data_set_loader):
                inputs, labels = batch
                
                #labels = torch.autograd.Variable(labels).type(torch.LongTensor)

                optimizer.zero_grad()
                
                # Only track history during training
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    predictions = torch.argmax(outputs, dim=1)
                    
                    # Only perform backpropagation during training
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    
                # Save statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(predictions == labels.data)
                
            epoch_loss = running_loss / len(data_set_loader.dataset)
            epoch_acc = running_corrects.double() / len(data_set_loader.dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
                
            

In [214]:
train_model(fc_net, train_loader, optimizer, criterion, validation_loader=validation_loader, epochs=20)

Epoch 0/19
----------
train Loss: 2.1652 Acc: 0.9111
val Loss: 2.1660 Acc: 0.9117
Epoch 1/19
----------
train Loss: 2.1648 Acc: 0.9192
val Loss: 2.1658 Acc: 0.9167
Epoch 2/19
----------
train Loss: 2.1647 Acc: 0.9231
val Loss: 2.1656 Acc: 0.9236
Epoch 3/19
----------
train Loss: 2.1645 Acc: 0.9281
val Loss: 2.1655 Acc: 0.9261
Epoch 4/19
----------
train Loss: 2.1645 Acc: 0.9307
val Loss: 2.1655 Acc: 0.9286
Epoch 5/19
----------
train Loss: 2.1643 Acc: 0.9359
val Loss: 2.1653 Acc: 0.9336
Epoch 6/19
----------
train Loss: 2.1642 Acc: 0.9395
val Loss: 2.1652 Acc: 0.9357
Epoch 7/19
----------
train Loss: 2.1641 Acc: 0.9419
val Loss: 2.1652 Acc: 0.9377
Epoch 8/19
----------
train Loss: 2.1640 Acc: 0.9438
val Loss: 2.1651 Acc: 0.9395
Epoch 9/19
----------
train Loss: 2.1640 Acc: 0.9469
val Loss: 2.1652 Acc: 0.9417
Epoch 10/19
----------
train Loss: 2.1639 Acc: 0.9487
val Loss: 2.1651 Acc: 0.9464
Epoch 11/19
----------
train Loss: 2.1639 Acc: 0.9513
val Loss: 2.1650 Acc: 0.9471
Epoch 12/19
--

Seems like it works okay!

I'm not interested in doing hyper parameter tuning, so let's just check that it works okay on the test set as well.

In [239]:
fc_net.eval()

FullyConnected(
  (fc1): Linear(in_features=784, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=120, bias=True)
  (output): Linear(in_features=120, out_features=10, bias=True)
)

In [240]:
test_dataset = torch.utils.data.TensorDataset(X_test.reshape(-1, 28*28))
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64)

In [241]:
len(test_dataset)

28000

In [242]:
predictions = []
for batch in test_loader:
    batch = batch[0]
    predictions += list(torch.argmax(fc_net(batch), dim=1))

In [251]:
submission_df = pd.DataFrame(list(zip(np.arange(1, 28001), map(lambda x: x.item(), predictions))), columns = ['ImageID', 'Label'])
submission_df.set_index('ImageID').to_csv('Submissions/pytorch_fc_1.csv')

Submission accuracy of 95.6% accuracy, which is similar to my validation accuracy. Great!

# CNN
Now let's build a Convolutional Network instead. I have done this once before using Keras, but this time I will opt to build a better understanding of how it works.