In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
from sklearn.model_selection import train_test_split

In [2]:
train = pd.read_csv("../input/digit-recognizer/train.csv")
test = pd.read_csv("../input/digit-recognizer/test.csv")

In [3]:
"""
#the most basic data preprocessing
class MNIST(Dataset):
    def __init__(self, X, y=None):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X) #or self.data
    
    def __getitem__(self,idx):
        if self.y is not None: #basically means if self.train is true, this diffs btw training and test dsets
            return self.X[idx], self.y[idx]
        self.X[idx]
"""

class MNIST(Dataset):
    def __init__(self, dataframe, train=True):
        self.train = train
        if train:
            self.labels = torch.tensor(dataframe['label'].values.astype(np.int64))
            dataframe.drop('label', axis = 1, inplace=True)
        self.data = torch.reshape(torch.tensor(dataframe.values.astype(np.float32)), (-1, 1, 28, 28))/255

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        input_data = self.data[index]
        if self.train:
            label = self.labels[index]
            return input_data, label
        return input_data

In [4]:
train_dataset = MNIST(train)
train_set, val_set = train_test_split(train_dataset, test_size=0.3, random_state = 42)

In [5]:
val_iter = iter(val_set)
image, label = next(val_iter)
image.size()

torch.Size([1, 28, 28])

In [6]:
train_loader = DataLoader(train_set, batch_size = 64, shuffle=True)
val_loader = DataLoader(val_set, batch_size = 64)

In [7]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,8, kernel_size=(3,3),stride=2, padding=1) #8*14*14
        self.act1 = nn.ReLU()
        self.conv2 = nn.Conv2d(8,16,kernel_size=(3,3),stride=2, padding=1)#16*7*7
        self.act2 = nn.ReLU()
        self.conv3 = nn.Conv2d(16, 32,kernel_size=(3,3),stride=2, padding=1)#32*4*4
        self.act3 = nn.ReLU()
        self.conv4 = nn.Conv2d(32,64,kernel_size=(3,3),stride=2, padding=1)#64*2*2
        self.act4 = nn.ReLU()
        #self.pool2 = nn.MaxPool2d(kernel_size=(2,2)) #max pooling downsamples by factors of 2
        self.flat = nn.Flatten()
        self.fc1 = nn.Linear(256, 128)
        self.act5 = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)
        
        
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.act1(x)
        x = self.conv2(x)
        x = self.act2(x)
        x = self.conv3(x)
        x = self.act3(x)
        x = self.conv4(x)
        x = self.act4(x)
        x = self.flat(x)
        x = self.fc1(x)
        x = self.act5(x)
        x = self.fc2(x)
        return x

In [8]:
model = CNN()

In [9]:
#define loss fn and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [10]:
#write training loop
epochs = 20
steps = 0
train_losses, val_losses = [], []
running_loss = 0 
val_loss = 0 

for epoch in range(epochs):
    model.train()
    for batch_idx, (images, labels) in enumerate(train_loader):
        steps += 1
        images = Variable(images.view(-1,1,28,28))
        labels = Variable(labels)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        total_train= 0
        correct_train = 0
        
        _,predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()
        train_acc = (correct_train/total_train) * 100
        

    with torch.no_grad():
        model.eval() #can also be model.inference_mode or something
        for data in val_loader:
            images, labels = data[0], data[1]
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
        correct_val = 0
        total_val = 0
        
        _,predicted = torch.max(outputs.data, 1)
        total_val += labels.size(0)
        correct_val += (predicted == labels).sum().item()
        val_acc = (correct_val/total_val) * 100
        
    train_losses.append(running_loss/total_train)
    val_losses.append(val_loss/total_val)
        
    print(f"Epoch [{epoch + 1}/{epochs}], Train loss: {running_loss/steps}, Train acc: {train_acc:.4f}, Val_loss: {val_loss/len(val_loader)} , Val_acc: {val_acc :.4f}")

Epoch [1/20], Train loss: 2.303763382849486, Train acc: 4.1667, Val_loss: 2.302177651884592 , Val_acc: 5.3571
Epoch [2/20], Train loss: 2.302377348360808, Train acc: 12.5000, Val_loss: 4.602199529028181 , Val_acc: 5.3571
Epoch [3/20], Train loss: 2.3013834951580434, Train acc: 8.3333, Val_loss: 6.900500753809353 , Val_acc: 5.3571
Epoch [4/20], Train loss: 2.3004404680884405, Train acc: 8.3333, Val_loss: 9.196494246497371 , Val_acc: 5.3571
Epoch [5/20], Train loss: 2.2992504523111426, Train acc: 16.6667, Val_loss: 11.4880774263198 , Val_acc: 5.3571
Epoch [6/20], Train loss: 2.29726691643397, Train acc: 4.1667, Val_loss: 13.768025538643 , Val_acc: 5.3571
Epoch [7/20], Train loss: 2.2920789750466435, Train acc: 25.0000, Val_loss: 15.990484419207888 , Val_acc: 16.0714
Epoch [8/20], Train loss: 2.2397011281355566, Train acc: 54.1667, Val_loss: 17.0664014967565 , Val_acc: 64.2857
Epoch [9/20], Train loss: 2.0635358190190964, Train acc: 83.3333, Val_loss: 17.54489662766759 , Val_acc: 73.2143


In [12]:
test = pd.read_csv("../input/digit-recognizer/test.csv")
test_dataset = MNIST(test, train=False)
test_loader = DataLoader(test_dataset, batch_size=64)

In [14]:
y_test = []

with torch.no_grad():
    for batch in test_loader:
        preds = model(batch).cpu()
        _,preds = torch.max(preds, 1)
        for pred in preds.numpy():
            y_test.append(pred)

In [16]:
submission = pd.read_csv("../input/digit-recognizer/sample_submission.csv")
submission["Label"] = pd.Series(y_test)

In [17]:
submission.to_csv('submission.csv', index=False)