In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torchvision

import jax
import jax.numpy as jnp

from sklearn.model_selection import train_test_split

In [2]:
train_df=pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test_df=pd.read_csv("/kaggle/input/digit-recognizer/test.csv")

In [3]:
len(train_df)

42000

In [4]:
X=train_df.copy()
y=X.pop("label")
X_train,X_val,y_train,y_val=train_test_split(X,y,test_size=0.2)

In [5]:
class DigitRecognizer(Dataset):
    def __init__(self, train,transform=None,target_transform=None):
        if train:
            self.df=X_train.copy()
            self.labels=y_train.copy()
        else:
            self.df=X_val.copy()
            self.labels=y_val.copy()
        self.transform=transform
        self.target_transform=target_transform
        
    def __len__(self):
        return len(self.labels)
        
    def __getitem__(self,idx):
        label=self.labels.iloc[idx]
        image=torch.tensor(self.df.iloc[idx].values).type(torch.float)
        if self.transform:
            image=self.transform(image)
        if self.target_transform:
            label=self.target_transform(label)
        return image, label


In [6]:
train_loader=DataLoader(DigitRecognizer(train=True), batch_size=64, shuffle=True)
test_loader=DataLoader(DigitRecognizer(train=False), batch_size=64, shuffle=True)

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [8]:
class MnistModel1(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_stack=nn.Sequential(
            nn.Linear(784,10),
            nn.ReLU(),
            nn.Linear(10,10),
            nn.ReLU(),
            nn.Linear(10,10)
        )

    def forward(self,x):
        return self.linear_stack(x)


model=MnistModel1().to(device)
model

MnistModel1(
  (linear_stack): Sequential(
    (0): Linear(in_features=784, out_features=10, bias=True)
    (1): ReLU()
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): ReLU()
    (4): Linear(in_features=10, out_features=10, bias=True)
  )
)

In [9]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(), lr=0.001)

In [10]:
def train(dataloader,model,loss_fn,optimizer):
    model.train()
    for i,(X,y) in enumerate(dataloader):
        X=X.to(device)
        y=y.to(device)
        pred=model(X)
        loss=loss_fn(pred,y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if i%100==0:
            print(f"Batch {i}, Loss {loss.item()}")
    
    
def test(dataloader, model, loss_fn):
    size=len(dataloader.dataset)
    num_batches=len(dataloader)
    model.eval()
    test_loss, correct=0,0
    with torch.no_grad():
        for X,y in dataloader:
            X=X.to(device)
            y=y.to(device)
            pred=model(X)
            test_loss+=loss_fn(pred,y).item()
            correct+= (pred.argmax(1)==y).type(torch.float).sum().item()
    test_loss/=num_batches
    correct/=size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [11]:
epochs=50
for i in range(epochs):
    print(f"Epoch {i}")
    train(train_loader,model,loss_fn,optimizer)
    test(test_loader,model,loss_fn)

Epoch 0
Batch 0, Loss 13.489460945129395
Batch 100, Loss 1.649893879890442
Batch 200, Loss 1.4109688997268677
Batch 300, Loss 1.352682113647461
Batch 400, Loss 1.1536303758621216
Batch 500, Loss 0.8712546825408936
Test Error: 
 Accuracy: 70.8%, Avg loss: 1.072456 

Epoch 1
Batch 0, Loss 1.2207920551300049
Batch 100, Loss 1.075443983078003
Batch 200, Loss 0.9196873903274536
Batch 300, Loss 0.6816635131835938
Batch 400, Loss 1.0095579624176025
Batch 500, Loss 0.8226373195648193
Test Error: 
 Accuracy: 74.2%, Avg loss: 0.921234 

Epoch 2
Batch 0, Loss 0.6789654493331909
Batch 100, Loss 1.1001297235488892
Batch 200, Loss 0.7210519313812256
Batch 300, Loss 0.48322418332099915
Batch 400, Loss 0.5225216150283813
Batch 500, Loss 0.7411543726921082
Test Error: 
 Accuracy: 78.8%, Avg loss: 0.683123 

Epoch 3
Batch 0, Loss 0.6714595556259155
Batch 100, Loss 0.690265417098999
Batch 200, Loss 0.6972518563270569
Batch 300, Loss 0.5277186632156372
Batch 400, Loss 0.4449980556964874
Batch 500, Loss 0.

In [12]:
test_df=pd.read_csv("/kaggle/input/digit-recognizer/test.csv")
X_test=torch.tensor(test_df.to_numpy()).type(torch.float).to(device)
X_test.shape
y=model(X_test)
s=nn.Softmax(dim=1)
preds=torch.argmax(s(y), dim=1).to("cpu").numpy()

In [13]:
submission=pd.read_csv("/kaggle/input/digit-recognizer/sample_submission.csv", index_col="ImageId")
submission["Label"]=preds
submission.to_csv("/kaggle/working/submission.csv")