In [2]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from matplotlib import pyplot as plt
%matplotlib inline
import time
import cv2
import os
from PIL import Image

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
root_dir = '../datasets/MixedMNIST/'
images_dir = '../datasets/MixedMNIST/images/'

# Mnist Train

In [5]:
mnist_train = ImageFolder(
    images_dir + 'train', 
    transform=transforms.Compose(
        [
            transforms.Grayscale(num_output_channels=1),
            transforms.ToTensor()
        ]
    )
)

# Mnist Train Datasource

In [6]:
class MNIST_Train_Datasource(Dataset):
    def __init__(self, csv_file, img_folder, transform=None):
        self.csv = pd.read_csv(csv_file)
        self.image_id = self.csv.id
  
        self.image_names = self.csv.image
        self.classification = self.csv.classification
        self.labels = self.csv.datasource

        self.img_folder = img_folder
        self.transform = transform
        
    def __len__(self):
        return len(self.csv)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.img_folder + "/" + str(self.classification[index]), self.csv.iloc[index, 1]) # 'image' column
        image = Image.open(img_path)
        
        if self.transform is not None:
            image = self.transform(image)
        
        label = self.labels[index]

        return image, label

In [7]:
mnist_train_datasource = MNIST_Train_Datasource(
    csv_file = root_dir + 'train_processed.csv', 
    img_folder = images_dir + 'train',
    transform = transforms.Compose(
        [
            transforms.Grayscale(num_output_channels = 1),
            transforms.ToTensor()
        ]
    )
)

# Mnist Test

In [8]:
class MNIST_Test(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.image_id = self.annotations.id
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 1]) # 'image' column
        image = Image.open(img_path)
        image_id = self.image_id[index]
        if self.transform is not None:
            image = self.transform(image)
            
        return image, image_id

In [9]:
mnist_test = MNIST_Test(
    csv_file = root_dir + 'test.csv', 
    root_dir = images_dir + 'test', 
    transform = transforms.Compose(
        [
            transforms.Grayscale(num_output_channels = 1),
            transforms.ToTensor()
        ]
    )
)

# Model

In [10]:
class Residual(nn.Module):
    def __init__(self, input_channels, fc_output1, fc_output2):
        super(Residual, self).__init__()
        self.lin1 = nn.Linear(input_channels, fc_output1)
        self.lin2 = nn.Linear(fc_output1, fc_output2)
        self.lin3 = nn.Linear(input_channels, fc_output2)
        

        self.rel1 = nn.ReLU()
        self.rel2 = nn.ReLU()

        self.bn1 = nn.BatchNorm1d(input_channels)
        self.bn2 = nn.BatchNorm1d(fc_output1)

    def forward(self, X):
        Y = self.bn1(X)
        Y = self.rel1(Y)
        Y = F.dropout(Y, p=0.5)
        Y = self.lin1(Y)
        Y = self.bn2(Y)
        Y = self.rel2(Y)
        Y = self.lin2(Y)
        Y += self.lin3(X)
        return Y

In [11]:
class MLP(nn.Module):
    def __init__(self, input_features, fc_output1, fc_output2, outputs):
        super(MLP, self).__init__()
        self.net = nn.Sequential(nn.Flatten(), 
                          nn.Linear(input_features, 256),
                          Residual(256, fc_output1, fc_output2),
                          nn.BatchNorm1d(fc_output2),
                          nn.ReLU(),
                          nn.Linear(fc_output2, 64),
                          Residual(64, fc_output1, fc_output2),
                          nn.BatchNorm1d(fc_output2),
                          nn.ReLU(),
                          nn.Linear(fc_output2, outputs),
                          )

    def forward(self, X):
        Y = self.net(X)
        return Y

# ResNet

In [12]:
class Residual(nn.Module):  #@save
    """The Residual block of ResNet."""
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                               kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                               kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

In [13]:
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                   nn.BatchNorm2d(64), nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

In [14]:
def resnet_block(input_channels, num_channels, num_residuals,
                 first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(input_channels, num_channels,
                                use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk

In [15]:
b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))

In [16]:
net = nn.Sequential(b1, b2, b3, b4, b5,
                    nn.AdaptiveAvgPool2d((1,1)),
                    nn.Flatten(), nn.Linear(512, 10))

# Metrics

In [17]:
def comp_accuracy(model, data_loader):
    correct = 0
    wrong = 0
    num_examples = 0
    
    # turn on eval mode if model Inherits from nn.Module
    if isinstance(model, nn.Module):
        model.eval()

    with torch.no_grad():
        for batch_index, (features, labels) in enumerate(data_loader):
            features = features.to(device)
            labels = labels.to(device)

            logits = model(features)
            _, predictions = torch.max(logits, dim=1) # single class with highest probability. simply retain indices
            
            num_examples += labels.size(0)

            correct += (predictions == labels).sum().float()
            wrong += (predictions != labels).sum().float()
            
        accuracy = correct / num_examples * 100      
        
    return correct, wrong, accuracy

# Training

In [18]:
def fit(model, train_loader, epochs, learning_rate, loss_func=nn.CrossEntropyLoss(), opt_func=torch.optim.SGD):
    
    optimizer = opt_func(model.parameters(), learning_rate) # objective function
    
    model = model.to(device)
    
    start = time.time() # measure time
    
    for epoch in range(epochs):
        
        model = model.train()
              
        for batch_index, (features, labels) in enumerate(train_loader):
            
            # gpu usage if possible
            features = features.to(device)
            labels = labels.to(device)
            
            # 1. forward
            logits = model(features)

            # 2. compute objective function (softmax, cross entropy)
            cost = loss_func(logits, labels)
            
            # 3. cleaning gradients
            optimizer.zero_grad() 

            # 4. accumulate partial derivatives
            cost.backward() 

            # 5. step in the opposite direction of the gradient
            optimizer.step() 
            
            if not batch_index % 250:
                print ('Epoch: {}/{} | Batch {}/{} | Cost: {:.4f}'.format(
                    epoch+1,
                    epochs,
                    batch_index,
                    len(train_loader),
                    cost
                ))
        
        correct, wrong, accuracy = comp_accuracy(model, train_loader)
        print ('Training: Correct[{:.0f}] | Wrong[{:.0f}] | Accuracy[{:.2f}%]'.format(
            correct,
            wrong,
            accuracy
        ), '\n')
         
    end = time.time()
    print('Training time: {:.2f} seconds on {}'.format(
        end - start, 
        device
    ))   

# Datasource

In [19]:
model_datasource = MLP(784, 200, 100, 10)

In [20]:
batch_size = 50
epochs = 5
learning_rate = 0.05

train_loader_datasource = DataLoader(mnist_train_datasource, batch_size=batch_size, shuffle=True)

In [15]:
#fit(model_datasource, train_loader_datasource, epochs, learning_rate) 

Epoch: 1/5 | Batch 0/4800 | Cost: 2.2643
Epoch: 1/5 | Batch 250/4800 | Cost: 0.0099
Epoch: 1/5 | Batch 500/4800 | Cost: 0.0130
Epoch: 1/5 | Batch 750/4800 | Cost: 0.0025
Epoch: 1/5 | Batch 1000/4800 | Cost: 0.0016
Epoch: 1/5 | Batch 1250/4800 | Cost: 0.0073
Epoch: 1/5 | Batch 1500/4800 | Cost: 0.0218
Epoch: 1/5 | Batch 1750/4800 | Cost: 0.0048
Epoch: 1/5 | Batch 2000/4800 | Cost: 0.0017
Epoch: 1/5 | Batch 2250/4800 | Cost: 0.0071
Epoch: 1/5 | Batch 2500/4800 | Cost: 0.0299
Epoch: 1/5 | Batch 2750/4800 | Cost: 0.0019
Epoch: 1/5 | Batch 3000/4800 | Cost: 0.0015
Epoch: 1/5 | Batch 3250/4800 | Cost: 0.0621
Epoch: 1/5 | Batch 3500/4800 | Cost: 0.0006
Epoch: 1/5 | Batch 3750/4800 | Cost: 0.0010
Epoch: 1/5 | Batch 4000/4800 | Cost: 0.0018
Epoch: 1/5 | Batch 4250/4800 | Cost: 0.0004
Epoch: 1/5 | Batch 4500/4800 | Cost: 0.0009
Epoch: 1/5 | Batch 4750/4800 | Cost: 0.0009
Training: Correct[239800] | Wrong[200] | Accuracy[99.92%] 

Epoch: 2/5 | Batch 0/4800 | Cost: 0.0020
Epoch: 2/5 | Batch 250/48

# Class

In [21]:
model_class = MLP(784, 200, 100, 10)

In [22]:
batch_size = 100
epochs = 60
learning_rate = 0.08

train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=batch_size, shuffle=False)

In [23]:
fit(net, train_loader, epochs, learning_rate)

Epoch: 1/60 | Batch 0/2400 | Cost: 2.3747
Epoch: 1/60 | Batch 250/2400 | Cost: 0.4858
Epoch: 1/60 | Batch 500/2400 | Cost: 0.3699
Epoch: 1/60 | Batch 750/2400 | Cost: 0.2748
Epoch: 1/60 | Batch 1000/2400 | Cost: 0.2772
Epoch: 1/60 | Batch 1250/2400 | Cost: 0.3591
Epoch: 1/60 | Batch 1500/2400 | Cost: 0.4153
Epoch: 1/60 | Batch 1750/2400 | Cost: 0.3212
Epoch: 1/60 | Batch 2000/2400 | Cost: 0.1604
Epoch: 1/60 | Batch 2250/2400 | Cost: 0.2614
Training: Correct[220734] | Wrong[19266] | Accuracy[91.97%] 

Epoch: 2/60 | Batch 0/2400 | Cost: 0.2671
Epoch: 2/60 | Batch 250/2400 | Cost: 0.1979
Epoch: 2/60 | Batch 500/2400 | Cost: 0.2483
Epoch: 2/60 | Batch 750/2400 | Cost: 0.2530
Epoch: 2/60 | Batch 1000/2400 | Cost: 0.2854
Epoch: 2/60 | Batch 1250/2400 | Cost: 0.1979
Epoch: 2/60 | Batch 1500/2400 | Cost: 0.1703
Epoch: 2/60 | Batch 1750/2400 | Cost: 0.3445
Epoch: 2/60 | Batch 2000/2400 | Cost: 0.3316
Epoch: 2/60 | Batch 2250/2400 | Cost: 0.2381
Training: Correct[223522] | Wrong[16478] | Accurac

Epoch: 17/60 | Batch 250/2400 | Cost: 0.0218
Epoch: 17/60 | Batch 500/2400 | Cost: 0.0353
Epoch: 17/60 | Batch 750/2400 | Cost: 0.0284
Epoch: 17/60 | Batch 1000/2400 | Cost: 0.1575
Epoch: 17/60 | Batch 1250/2400 | Cost: 0.0475
Epoch: 17/60 | Batch 1500/2400 | Cost: 0.1268
Epoch: 17/60 | Batch 1750/2400 | Cost: 0.0156
Epoch: 17/60 | Batch 2000/2400 | Cost: 0.0480
Epoch: 17/60 | Batch 2250/2400 | Cost: 0.0361
Training: Correct[236613] | Wrong[3387] | Accuracy[98.59%] 

Epoch: 18/60 | Batch 0/2400 | Cost: 0.0153
Epoch: 18/60 | Batch 250/2400 | Cost: 0.0149
Epoch: 18/60 | Batch 500/2400 | Cost: 0.0728
Epoch: 18/60 | Batch 750/2400 | Cost: 0.0203
Epoch: 18/60 | Batch 1000/2400 | Cost: 0.0660
Epoch: 18/60 | Batch 1250/2400 | Cost: 0.1426
Epoch: 18/60 | Batch 1500/2400 | Cost: 0.0591
Epoch: 18/60 | Batch 1750/2400 | Cost: 0.0456
Epoch: 18/60 | Batch 2000/2400 | Cost: 0.0421
Epoch: 18/60 | Batch 2250/2400 | Cost: 0.0918
Training: Correct[236595] | Wrong[3405] | Accuracy[98.58%] 

Epoch: 19/60 

Epoch: 33/60 | Batch 250/2400 | Cost: 0.0165
Epoch: 33/60 | Batch 500/2400 | Cost: 0.0273
Epoch: 33/60 | Batch 750/2400 | Cost: 0.0148
Epoch: 33/60 | Batch 1000/2400 | Cost: 0.0005
Epoch: 33/60 | Batch 1250/2400 | Cost: 0.0223
Epoch: 33/60 | Batch 1500/2400 | Cost: 0.0222
Epoch: 33/60 | Batch 1750/2400 | Cost: 0.0074
Epoch: 33/60 | Batch 2000/2400 | Cost: 0.0339
Epoch: 33/60 | Batch 2250/2400 | Cost: 0.0024
Training: Correct[238735] | Wrong[1265] | Accuracy[99.47%] 

Epoch: 34/60 | Batch 0/2400 | Cost: 0.0262
Epoch: 34/60 | Batch 250/2400 | Cost: 0.0022
Epoch: 34/60 | Batch 500/2400 | Cost: 0.0035
Epoch: 34/60 | Batch 750/2400 | Cost: 0.0027
Epoch: 34/60 | Batch 1000/2400 | Cost: 0.0014
Epoch: 34/60 | Batch 1250/2400 | Cost: 0.0023
Epoch: 34/60 | Batch 1500/2400 | Cost: 0.0068
Epoch: 34/60 | Batch 1750/2400 | Cost: 0.0041
Epoch: 34/60 | Batch 2000/2400 | Cost: 0.0034
Epoch: 34/60 | Batch 2250/2400 | Cost: 0.0211
Training: Correct[238949] | Wrong[1051] | Accuracy[99.56%] 

Epoch: 35/60 

Epoch: 49/60 | Batch 250/2400 | Cost: 0.0014
Epoch: 49/60 | Batch 500/2400 | Cost: 0.0026
Epoch: 49/60 | Batch 750/2400 | Cost: 0.0021
Epoch: 49/60 | Batch 1000/2400 | Cost: 0.0004
Epoch: 49/60 | Batch 1250/2400 | Cost: 0.0007
Epoch: 49/60 | Batch 1500/2400 | Cost: 0.0002
Epoch: 49/60 | Batch 1750/2400 | Cost: 0.0006
Epoch: 49/60 | Batch 2000/2400 | Cost: 0.0008
Epoch: 49/60 | Batch 2250/2400 | Cost: 0.0093
Training: Correct[239394] | Wrong[606] | Accuracy[99.75%] 

Epoch: 50/60 | Batch 0/2400 | Cost: 0.0424
Epoch: 50/60 | Batch 250/2400 | Cost: 0.0005
Epoch: 50/60 | Batch 500/2400 | Cost: 0.0002
Epoch: 50/60 | Batch 750/2400 | Cost: 0.0021
Epoch: 50/60 | Batch 1000/2400 | Cost: 0.0048
Epoch: 50/60 | Batch 1250/2400 | Cost: 0.0001
Epoch: 50/60 | Batch 1500/2400 | Cost: 0.0084
Epoch: 50/60 | Batch 1750/2400 | Cost: 0.0012
Epoch: 50/60 | Batch 2000/2400 | Cost: 0.0070
Epoch: 50/60 | Batch 2250/2400 | Cost: 0.0086
Training: Correct[239592] | Wrong[408] | Accuracy[99.83%] 

Epoch: 51/60 | 

In [18]:
#fit(model_class, train_loader, epochs, learning_rate)

Epoch: 1/30 | Batch 0/4800 | Cost: 2.3032
Epoch: 1/30 | Batch 250/4800 | Cost: 0.9592
Epoch: 1/30 | Batch 500/4800 | Cost: 0.7006
Epoch: 1/30 | Batch 750/4800 | Cost: 0.7484
Epoch: 1/30 | Batch 1000/4800 | Cost: 0.5136
Epoch: 1/30 | Batch 1250/4800 | Cost: 0.4053
Epoch: 1/30 | Batch 1500/4800 | Cost: 0.3880
Epoch: 1/30 | Batch 1750/4800 | Cost: 0.3627
Epoch: 1/30 | Batch 2000/4800 | Cost: 0.3981
Epoch: 1/30 | Batch 2250/4800 | Cost: 0.4449
Epoch: 1/30 | Batch 2500/4800 | Cost: 0.3490
Epoch: 1/30 | Batch 2750/4800 | Cost: 0.2230
Epoch: 1/30 | Batch 3000/4800 | Cost: 0.3808
Epoch: 1/30 | Batch 3250/4800 | Cost: 0.3997
Epoch: 1/30 | Batch 3500/4800 | Cost: 0.1927
Epoch: 1/30 | Batch 3750/4800 | Cost: 0.3967
Epoch: 1/30 | Batch 4000/4800 | Cost: 0.4604
Epoch: 1/30 | Batch 4250/4800 | Cost: 0.4535
Epoch: 1/30 | Batch 4500/4800 | Cost: 0.3809
Epoch: 1/30 | Batch 4750/4800 | Cost: 0.4499
Training: Correct[214481] | Wrong[25519] | Accuracy[89.37%] 

Epoch: 2/30 | Batch 0/4800 | Cost: 0.2557
Ep

Epoch: 9/30 | Batch 3250/4800 | Cost: 0.1647
Epoch: 9/30 | Batch 3500/4800 | Cost: 0.2368
Epoch: 9/30 | Batch 3750/4800 | Cost: 0.2820
Epoch: 9/30 | Batch 4000/4800 | Cost: 0.1946
Epoch: 9/30 | Batch 4250/4800 | Cost: 0.2193
Epoch: 9/30 | Batch 4500/4800 | Cost: 0.4005
Epoch: 9/30 | Batch 4750/4800 | Cost: 0.0938
Training: Correct[224469] | Wrong[15531] | Accuracy[93.53%] 

Epoch: 10/30 | Batch 0/4800 | Cost: 0.2915
Epoch: 10/30 | Batch 250/4800 | Cost: 0.1376
Epoch: 10/30 | Batch 500/4800 | Cost: 0.1723
Epoch: 10/30 | Batch 750/4800 | Cost: 0.2051
Epoch: 10/30 | Batch 1000/4800 | Cost: 0.3097
Epoch: 10/30 | Batch 1250/4800 | Cost: 0.1144
Epoch: 10/30 | Batch 1500/4800 | Cost: 0.2970
Epoch: 10/30 | Batch 1750/4800 | Cost: 0.1659
Epoch: 10/30 | Batch 2000/4800 | Cost: 0.1674
Epoch: 10/30 | Batch 2250/4800 | Cost: 0.0507
Epoch: 10/30 | Batch 2500/4800 | Cost: 0.1837
Epoch: 10/30 | Batch 2750/4800 | Cost: 0.2552
Epoch: 10/30 | Batch 3000/4800 | Cost: 0.1909
Epoch: 10/30 | Batch 3250/4800 

Epoch: 18/30 | Batch 250/4800 | Cost: 0.1449
Epoch: 18/30 | Batch 500/4800 | Cost: 0.1923
Epoch: 18/30 | Batch 750/4800 | Cost: 0.1929
Epoch: 18/30 | Batch 1000/4800 | Cost: 0.1407
Epoch: 18/30 | Batch 1250/4800 | Cost: 0.1138
Epoch: 18/30 | Batch 1500/4800 | Cost: 0.2157
Epoch: 18/30 | Batch 1750/4800 | Cost: 0.2262
Epoch: 18/30 | Batch 2000/4800 | Cost: 0.1742
Epoch: 18/30 | Batch 2250/4800 | Cost: 0.0555
Epoch: 18/30 | Batch 2500/4800 | Cost: 0.1733
Epoch: 18/30 | Batch 2750/4800 | Cost: 0.2251
Epoch: 18/30 | Batch 3000/4800 | Cost: 0.0744
Epoch: 18/30 | Batch 3250/4800 | Cost: 0.1766
Epoch: 18/30 | Batch 3500/4800 | Cost: 0.2431
Epoch: 18/30 | Batch 3750/4800 | Cost: 0.2216
Epoch: 18/30 | Batch 4000/4800 | Cost: 0.0554
Epoch: 18/30 | Batch 4250/4800 | Cost: 0.1842
Epoch: 18/30 | Batch 4500/4800 | Cost: 0.2640
Epoch: 18/30 | Batch 4750/4800 | Cost: 0.2081
Training: Correct[228075] | Wrong[11925] | Accuracy[95.03%] 

Epoch: 19/30 | Batch 0/4800 | Cost: 0.0815
Epoch: 19/30 | Batch 250

Epoch: 26/30 | Batch 2500/4800 | Cost: 0.0202
Epoch: 26/30 | Batch 2750/4800 | Cost: 0.1054
Epoch: 26/30 | Batch 3000/4800 | Cost: 0.1402
Epoch: 26/30 | Batch 3250/4800 | Cost: 0.0940
Epoch: 26/30 | Batch 3500/4800 | Cost: 0.2179
Epoch: 26/30 | Batch 3750/4800 | Cost: 0.0442
Epoch: 26/30 | Batch 4000/4800 | Cost: 0.1451
Epoch: 26/30 | Batch 4250/4800 | Cost: 0.1144
Epoch: 26/30 | Batch 4500/4800 | Cost: 0.0976
Epoch: 26/30 | Batch 4750/4800 | Cost: 0.1035
Training: Correct[230192] | Wrong[9808] | Accuracy[95.91%] 

Epoch: 27/30 | Batch 0/4800 | Cost: 0.0869
Epoch: 27/30 | Batch 250/4800 | Cost: 0.1206
Epoch: 27/30 | Batch 500/4800 | Cost: 0.0848
Epoch: 27/30 | Batch 750/4800 | Cost: 0.1114
Epoch: 27/30 | Batch 1000/4800 | Cost: 0.0841
Epoch: 27/30 | Batch 1250/4800 | Cost: 0.1580
Epoch: 27/30 | Batch 1500/4800 | Cost: 0.1143
Epoch: 27/30 | Batch 1750/4800 | Cost: 0.1050
Epoch: 27/30 | Batch 2000/4800 | Cost: 0.1117
Epoch: 27/30 | Batch 2250/4800 | Cost: 0.1187
Epoch: 27/30 | Batch 2500

In [24]:
results = []

with torch.no_grad():
    for batch_index, (features, image_id) in enumerate(test_loader):
        features = features.to(device)

        logits = net(features)
        _, predictions = torch.max(logits, dim=1) # single class with highest probability. simply retain indices
            
        for i, features in enumerate(features): # now iterate over each element of the current batch
            results.append(
                [image_id[i].detach().numpy(), predictions[i].cpu().numpy()]
            )

In [25]:
df = pd.DataFrame(results, columns =['id', 'classification'])
df.head()

Unnamed: 0,id,classification
0,240000,6
1,240001,9
2,240002,0
3,240003,8
4,240004,2


In [26]:
df.to_csv("submission.csv", index=False, sep=",")

In [23]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(128, 10)
        )
                
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = F.log_softmax(x, dim=1)
        return x

In [None]:
model_test = Model()
fit(model_test, train_loader, epochs, learning_rate, loss_func=nn.NLLLoss(), opt_func=torch.optim.Adam)

Epoch: 1/60 | Batch 0/2400 | Cost: 2.6923
Epoch: 1/60 | Batch 250/2400 | Cost: 1.4882
Epoch: 1/60 | Batch 500/2400 | Cost: 0.7814
Epoch: 1/60 | Batch 750/2400 | Cost: 0.7930
Epoch: 1/60 | Batch 1000/2400 | Cost: 0.5620
Epoch: 1/60 | Batch 1250/2400 | Cost: 0.6009
Epoch: 1/60 | Batch 1500/2400 | Cost: 0.5152
Epoch: 1/60 | Batch 1750/2400 | Cost: 0.6882
Epoch: 1/60 | Batch 2000/2400 | Cost: 0.4901
Epoch: 1/60 | Batch 2250/2400 | Cost: 0.4616
Training: Correct[209185] | Wrong[30815] | Accuracy[87.16%] 

Epoch: 2/60 | Batch 0/2400 | Cost: 0.5687
Epoch: 2/60 | Batch 250/2400 | Cost: 0.2883
Epoch: 2/60 | Batch 500/2400 | Cost: 0.2899
Epoch: 2/60 | Batch 750/2400 | Cost: 0.2329
Epoch: 2/60 | Batch 1000/2400 | Cost: 0.5192
Epoch: 2/60 | Batch 1250/2400 | Cost: 0.5179
Epoch: 2/60 | Batch 1500/2400 | Cost: 0.4140
Epoch: 2/60 | Batch 1750/2400 | Cost: 0.4922
Epoch: 2/60 | Batch 2000/2400 | Cost: 0.3890
Epoch: 2/60 | Batch 2250/2400 | Cost: 0.6413
Training: Correct[211951] | Wrong[28049] | Accurac

Epoch: 17/60 | Batch 250/2400 | Cost: 0.2324
Epoch: 17/60 | Batch 500/2400 | Cost: 0.2590
Epoch: 17/60 | Batch 750/2400 | Cost: 0.3600
Epoch: 17/60 | Batch 1000/2400 | Cost: 0.2443
Epoch: 17/60 | Batch 1250/2400 | Cost: 0.4070
Epoch: 17/60 | Batch 1500/2400 | Cost: 0.1824
Epoch: 17/60 | Batch 1750/2400 | Cost: 0.2736
Epoch: 17/60 | Batch 2000/2400 | Cost: 0.1240
Epoch: 17/60 | Batch 2250/2400 | Cost: 0.1149
Training: Correct[224177] | Wrong[15823] | Accuracy[93.41%] 

Epoch: 18/60 | Batch 0/2400 | Cost: 0.3992
Epoch: 18/60 | Batch 250/2400 | Cost: 0.2837
Epoch: 18/60 | Batch 500/2400 | Cost: 0.4381
Epoch: 18/60 | Batch 750/2400 | Cost: 0.4999
Epoch: 18/60 | Batch 1000/2400 | Cost: 0.3099
Epoch: 18/60 | Batch 1250/2400 | Cost: 0.3116
Epoch: 18/60 | Batch 1500/2400 | Cost: 0.2637
Epoch: 18/60 | Batch 1750/2400 | Cost: 0.1893
Epoch: 18/60 | Batch 2000/2400 | Cost: 0.2768
Epoch: 18/60 | Batch 2250/2400 | Cost: 0.4490
Training: Correct[219727] | Wrong[20273] | Accuracy[91.55%] 

Epoch: 19/6

Epoch: 33/60 | Batch 250/2400 | Cost: 0.2256
Epoch: 33/60 | Batch 500/2400 | Cost: 0.1465
Epoch: 33/60 | Batch 750/2400 | Cost: 0.4307
Epoch: 33/60 | Batch 1000/2400 | Cost: 0.2683
Epoch: 33/60 | Batch 1250/2400 | Cost: 0.2272
Epoch: 33/60 | Batch 1500/2400 | Cost: 0.2907
Epoch: 33/60 | Batch 1750/2400 | Cost: 0.1754
Epoch: 33/60 | Batch 2000/2400 | Cost: 0.3562
Epoch: 33/60 | Batch 2250/2400 | Cost: 0.2329
Training: Correct[227812] | Wrong[12188] | Accuracy[94.92%] 

Epoch: 34/60 | Batch 0/2400 | Cost: 0.1584
Epoch: 34/60 | Batch 250/2400 | Cost: 0.3096
Epoch: 34/60 | Batch 500/2400 | Cost: 0.3960
Epoch: 34/60 | Batch 750/2400 | Cost: 0.3197
Epoch: 34/60 | Batch 1000/2400 | Cost: 0.3560
Epoch: 34/60 | Batch 1250/2400 | Cost: 0.1384
Epoch: 34/60 | Batch 1500/2400 | Cost: 0.2901
Epoch: 34/60 | Batch 1750/2400 | Cost: 0.1560
Epoch: 34/60 | Batch 2000/2400 | Cost: 0.1100
Epoch: 34/60 | Batch 2250/2400 | Cost: 0.2548
Training: Correct[226773] | Wrong[13227] | Accuracy[94.49%] 

Epoch: 35/6