### Analyse MNIST dataset

In [1]:
import pandas as pd

train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

print("Train dataset shape: {}".format(train_data.shape))
print("Test dataset shape: {}".format(test_data.shape))

Train dataset shape: (42000, 785)
Test dataset shape: (28000, 784)


In [2]:
train_data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Split training set into validation 

In [11]:
import torch

y_train = train_data["label"].to_numpy().astype('float32')
x_train = train_data.drop(columns=["label"]).to_numpy().astype('float32')

from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size = 0.3, random_state = 0)

X_train, X_valid, y_train, y_valid = map(torch.tensor, (X_train, X_valid, y_train, y_valid))
y_train = y_train.type(torch.LongTensor) #pytorch need for labels floats
y_valid = y_valid.type(torch.LongTensor)

#### Load data to TensorDataset and DataLoader

In [4]:
batch_size = 64
learning_rate = 1e-3
epochs = 10

In [5]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

train_ds = TensorDataset(X_train, y_train)
valid_ds = TensorDataset(X_valid, y_valid)


def get_data_loader(train_ds, valid_ds, batch_size):
    return (
        DataLoader(train_ds, batch_size=batch_size, shuffle=True),
        DataLoader(valid_ds, batch_size=batch_size * 2),
    )

#### Define model simple linear model

In [6]:
from torch import nn

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.lin = nn.Linear(784, 10)
        
    def forward(self, xb):
        return self.lin(xb)

#### Define loss function and optimizer

In [7]:
import torch.nn.functional as F
from torch import optim

loss_func = F.cross_entropy

def get_model():
    model = SimpleNet()
    return model, optim.Adam(model.parameters(), lr=learning_rate)

model, opt = get_model()
train_dl, valid_dl = get_data_loader(train_ds, valid_ds, batch_size)

#### Training model

In [16]:
for epoch in range(epochs):
    model.train()
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_func(pred, yb)
        
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    print("Epoch: {}  Loss: {}".format(epoch+1, loss))
    
    model.eval()
    with torch.no_grad():
        valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
        
        

print("Training loss: {}".format(loss_func(model(xb),yb).detach().numpy()))
print("Validation loss: {}".format(valid_loss / len(valid_ds)))

Epoch: 1  Loss: 3.8689167499542236
Epoch: 2  Loss: 2.2042083740234375
Epoch: 3  Loss: 39.36684799194336
Epoch: 4  Loss: 39.55915832519531
Epoch: 5  Loss: 0.7501220703125
Epoch: 6  Loss: 7.398874759674072
Epoch: 7  Loss: 44.97210693359375
Epoch: 8  Loss: 38.88969802856445
Epoch: 9  Loss: 139.79930114746094
Epoch: 10  Loss: 46.45380783081055
Training loss: 30.18180274963379
Validation loss: 0.49650129675865173


#### CNN Model

In [26]:
class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)

    def forward(self, xb):
        xb = xb.view(-1, 1, 28, 28) #pytorch version of numpy (view) and reshaping
        xb = F.relu(self.conv1(xb))
        xb = F.relu(self.conv2(xb))
        xb = F.relu(self.conv3(xb))
        xb = F.avg_pool2d(xb, 4)
        return xb.view(-1, xb.size(1))

In [27]:
def get_model():
    model = Mnist_CNN()
    return model, optim.Adam(model.parameters(), lr=learning_rate)

model, opt = get_model()
train_dl, valid_dl = get_data_loader(train_ds, valid_ds, batch_size)

In [30]:
for epoch in range(epochs):
    model.train()
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_func(pred, yb)
        
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    print("Epoch: {}  Loss: {}".format(epoch+1, loss))
    
    model.eval()
    with torch.no_grad():
        valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
    
        
print("Training loss: {}".format(loss_func(model(xb),yb).detach().numpy()))
print("Validation loss: {}".format(valid_loss / len(valid_ds)))

Epoch: 1  Loss: 0.049665581434965134
Epoch: 2  Loss: 0.06621337682008743
Epoch: 3  Loss: 0.1308954358100891
Epoch: 4  Loss: 0.13152199983596802
Epoch: 5  Loss: 0.5564932227134705
Epoch: 6  Loss: 0.05504919961094856
Epoch: 7  Loss: 0.2766396701335907
Epoch: 8  Loss: 0.12230592966079712
Epoch: 9  Loss: 0.0747942253947258
Epoch: 10  Loss: 0.048705603927373886
Training loss: 0.052238304167985916 Accuracy: 1.0
Validation loss: 0.0025319592095911503
