## Google Colabs Setup

In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80'

#!pip uninstall torch -y
!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import numpy as np
import pandas as pd

In [3]:
torch.cuda.is_available()

True

## Import Torch Data

In [0]:
import random
from math import floor

def train_valid_split(dataset, test_size = 0.25, shuffle = False, random_seed = 0):
    """ Return a list of splitted indices from a DataSet.
    Indices can be used with DataLoader to build a train and validation set.
    
    Arguments:
        A Dataset
        A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split)
        Shuffling True or False
        Random seed
    """
    length = dataset.__len__()
    indices = list(range(1,length))
    
    if shuffle == True:
        random.seed(random_seed)
        random.shuffle(indices)
    
    if type(test_size) is float:
        split = floor(test_size * length)
    elif type(test_size) is int:
        split = test_size
    else:
        raise ValueError('%s should be an int or a float' % str)
    return indices[split:], indices[:split]

In [5]:
# dtype = torch.cuda.FloatTensor
dtype = torch.FloatTensor
bs = 64

train_transforms = transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])

train_data = datasets.MNIST('../data/mnist', train=True, download=True,
                  transform=train_transforms)

valid_data = datasets.MNIST('../data/mnist', train=True, download=True,
                  transform=train_transforms)

test_data = datasets.MNIST('../data/mnist', train=False, download=True,
                  transform=train_transforms)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [0]:
from torch.utils.data.sampler import SubsetRandomSampler

train_idx, valid_idx = train_valid_split(train_data, shuffle=True)
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

In [0]:
kwargs = {'num_workers': 1, 'pin_memory': True}
bs=64
# train_loader = torch.utils.data.DataLoader(train_dataset, 
#                                         batch_size=bs, sampler=train_sampler, **kwargs)
train_loader2 = DataLoader(train_data,
        batch_size=bs, sampler=train_sampler, **kwargs)

valid_loader2 = DataLoader(valid_data,
        batch_size=bs, sampler=valid_sampler, **kwargs)

test_loader2 = DataLoader(train_data,
        batch_size=bs, **kwargs)

## Import Kaggle Data

In [0]:
from sklearn.model_selection import train_test_split

#dtype = torch.cuda.FloatTensor
dtype = torch.FloatTensor

class DigitDataset(Dataset):
    
    def __init__(self, csv_path, dtype, mode):
        train_data = pd.read_csv(csv_path)
        self.dtype = dtype
        self.mode = mode
        if(mode == "train" or mode == "val"):
            labels = train_data.label.values.astype("int")
            pixels = train_data.drop('label', axis=1).values.astype('float32')
            X_train, X_valid, y_train, y_valid = train_test_split(pixels,
                                                                  labels, shuffle=False,
                                                                  test_size=0.001)
            self.N = X_train.shape[0]
            self.V = X_valid.shape[0]
            self.X_train = X_train.reshape([self.N, 1, 28, 28])
            self.y_train = y_train.reshape([self.N, 1])
            self.X_valid = X_valid.reshape([self.V, 1, 28, 28])
            self.y_valid = y_valid.reshape([self.V, 1])
#             print(y_train.shape, y_valid.shape)
        if(mode == "test"):
            test_data = pd.read_csv(csv_path)
            pixels = test_data.values.astype("int")
            self.T = test_data.shape[0]
            self.X_test = pixels.reshape([self.T, 1, 28, 28])
            
    def __getitem__(self, index):
        if (self.mode == "train" or self.mode == "val"):
            label = torch.from_numpy(self.y_train[index]).type(self.dtype)
            img = torch.from_numpy(self.X_train[index]).type(self.dtype)
            return img, label
        
#         if (self.mode == "val"):
#             label = torch.from_numpy(self.y_train[index]).type(self.dtype)
#             img = torch.from_numpy(self.X_train[index]).type(self.dtype)
#             return img, label
        
        if (self.mode == "test"):
            img = torch.from_numpy(self.X_test[index]).type(self.dtype)
            return img, 0
    
    def __len__(self):
        if (self.mode == "train"):
            return self.N
        
        if (self.mode == "val"):
            return self.V
        
        if (self.mode == "test"):
            return self.T
        

In [40]:
!pip install kaggle

from google.colab import files

uploaded = files.upload()



Saving kaggle.json to kaggle.json


In [41]:
!mv kaggle.json .kaggle
!chmod 600 .kaggle/kaggle.json
!kaggle competitions download -c digit-recognizer

train.csv: Downloaded 73MB of 73MB
test.csv: Downloaded 49MB of 49MB
sample_submission.csv: Downloaded 235KB of 235KB


In [45]:
PATH="/content/.kaggle/competitions/digit-recognizer/"

kaggle_train_data = DigitDataset(PATH+"train.csv", dtype, "train")
kaggle_valid_data = DigitDataset(PATH+"train.csv", dtype, "train")
kaggle_test_data = DigitDataset(PATH+"test.csv", dtype, "test")

print(len(kaggle_valid_data))

41958


In [0]:
kaggle_train_idx, kaggle_valid_idx = train_valid_split(kaggle_train_data, shuffle=True)
kaggle_train_sampler = SubsetRandomSampler(kaggle_train_idx)
kaggle_valid_sampler = SubsetRandomSampler(kaggle_valid_idx)

In [0]:
bs = 64

train_transforms = transform=transforms.Compose([
                       transforms.ToPILImage(),
                       transforms.RandomRotation(20),
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,)),
                       
])

kwargs = {'num_workers': 1, 'pin_memory': True}

train_loader = DataLoader(kaggle_train_data, sampler=kaggle_train_sampler,
        batch_size=bs, **kwargs)

valid_loader = DataLoader(kaggle_valid_data, sampler=kaggle_valid_sampler,
        batch_size=bs, **kwargs)

test_loader = DataLoader(kaggle_test_data,
        batch_size=bs, **kwargs)

## Set up Model

In [0]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size()
        return x.view(-1, 320)

class SimpleConv(nn.Module):
    def __init__(self):
        super(SimpleConv, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3)
        self.conv2_bn = nn.BatchNorm2d(32)
        self.conv2_drop = nn.Dropout2d()
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv3_bn = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3)
        self.conv4_bn = nn.BatchNorm2d(64)
        self.conv4_drop = nn.Dropout2d()
        self.dense1 = nn.Linear(4096, 512)
        self.dense1_bn = nn.BatchNorm1d(512)
        self.dense2 = nn.Linear(512, 512)
        self.dense2_bn = nn.BatchNorm1d(512)
        self.dense3 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = F.relu(self.conv1_bn(self.conv1(x)))
        x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2_drop(self.conv2(x))), 2))
        x = F.relu(self.conv3_bn(self.conv3(x)))
        x = F.relu(self.conv4_bn(self.conv4_drop(self.conv4(x))))
        x = x.view(x.size(0), -1)
        x = F.relu(self.dense1_bn(F.dropout(self.dense1(x), training=self.training)))
        x = F.relu(self.dense2_bn(F.dropout(self.dense2(x), training=self.training)))
        x = self.dense3(x)
#         print(x.size())
        return F.log_softmax(x, dim=1)
    
#   def train(self, x, y, optimizer, epochs)

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)



In [80]:
# model = simpleConv()
model = SimpleConv()
model.cuda()

SimpleConv(
  (conv1): Conv2d (1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
  (conv2): Conv2d (32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
  (conv2_drop): Dropout2d(p=0.5)
  (conv3): Conv2d (32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (conv4): Conv2d (64, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (conv4_drop): Dropout2d(p=0.5)
  (dense1): Linear(in_features=4096, out_features=512)
  (dense1_bn): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True)
  (dense2): Linear(in_features=512, out_features=512)
  (dense2_bn): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True)
  (dense3): Linear(in_features=512, out_features=10)
)

In [0]:
optimizer = optim.Adam(model.parameters(), lr=.003)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
criterion = nn.CrossEntropyLoss()

def train(epochs=1):
    for epoch in range(epochs):
        model.train()
        
        exp_lr_scheduler.step()
        
        for batch_idx, (data, target) in enumerate(train_loader):
            data = Variable(data.cuda())
            target = Variable(target.cuda()).type(torch.cuda.LongTensor).view(len(target))
            optimizer.zero_grad()
            pred = model(data)
#             if batch_idx == 0: print(target, pred)
            loss = criterion(pred, target)
            loss.backward()
            optimizer.step()
      
            log_interval = 200
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch+1, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.data[0]))

def eval(data_loader):
  running_val_loss = 0
  running_val_correct = 0
  num_elements = 0
  
  model.eval()
  for batch_idx, (data, target) in enumerate(data_loader):
      data = Variable(data.cuda())
      target = Variable(target.cuda()).type(torch.cuda.LongTensor).view(len(target))
      pred = model(data)
      _, pred_max = torch.max(pred.data, 1)

      loss = F.cross_entropy(pred, target, size_average=False).data[0]
      running_val_loss += loss
      running_val_correct += torch.sum(pred_max == target.data)
      num_elements += len(target)

  print("Log loss:", "{:.2e}".format(running_val_loss / num_elements), "Accuracy:", round((running_val_correct / num_elements), 4), "out of", num_elements, "\n")


In [103]:
train(4)



In [123]:
print("Train:")
eval(train_loader)

print("Validation:")
eval(valid_loader)

Train:
Log loss: 2.80e-03 Accuracy: 0.9992 out of 31468 

Validation:
Log loss: 2.07e-02 Accuracy: 0.9926 out of 10489 



In [115]:
train(20)



## Make Predictions

In [118]:
entries = []
i = 1
for batch_idx, (data, target) in enumerate(test_loader):
    data = Variable(data.cuda(), volatile=True)
    preds = model(data)
    for pred in preds:
#         print(pred)
        pred = pred.max(0)[1].data[0]
        entry = { 'ImageId': i, 'Label': pred }
        entries.append(entry)
        i += 1
    
print(entries[0])

{'ImageId': 1, 'Label': 2}


In [119]:
submit = pd.DataFrame(data=entries)
submit.to_csv('mnist_torch_submit.csv', index=False)
submit.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3


In [120]:
!kaggle competitions submit -c digit-recognizer -f mnist_torch_submit.csv -m "PyTorch, now with more epochs and rotations!"

Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 11, in <module>
    sys.exit(main())
  File "/usr/local/lib/python3.6/dist-packages/kaggle/cli.py", line 18, in main
    out = args.func(**command_args)
  File "/usr/local/lib/python3.6/dist-packages/kaggle/api/kaggle_api_extended.py", line 70, in competitionSubmit
    submitResult = self.competitions_submissions_submit(id = competition, blob_file_tokens = uploadResultToken, submission_description = message)
  File "/usr/local/lib/python3.6/dist-packages/kaggle/api/kaggle_api.py", line 436, in competitions_submissions_submit
    (data) = self.competitions_submissions_submit_with_http_info(blob_file_tokens, submission_description, id, **kwargs)  # noqa: E501
  File "/usr/local/lib/python3.6/dist-packages/kaggle/api/kaggle_api.py", line 527, in competitions_submissions_submit_with_http_info
    collection_formats=collection_formats)
  File "/usr/local/lib/python3.6/dist-packages/kaggle/api_client.py", line