In [1]:
import torch
import torch.nn as nn 
import torch.optim as optim 
import torch.nn.functional as F
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
X_train = train.iloc[:, 1:].values
y_train = train.iloc[:, 0].values

In [4]:
X_train = X_train.reshape(-1, 28, 28).astype('float32') / 255.0

In [5]:
X_test = test.values.astype('float32') / 255.0
X_test = X_test.reshape(-1, 28, 28)

In [6]:
class DigitData(Dataset):
    def __init__(self, images, labels=None, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform 
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        if self.transform:
            image = self.transform(image)
            
        if self.labels is not None:
            label = self.labels[idx]
            return image, label
        else:
            return image

In [7]:
transform = transforms.Compose([
    transforms.ToTensor(),  
    transforms.Normalize((0.1307,), (0.3081,))
])

In [8]:
train_dataset = DigitData(X_train, y_train, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = DigitData(X_test, transform=transform) 
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [9]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(12544, 128)
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.fc2(x)
        return x

In [10]:
model = CNN()

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=12544, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [13]:
epochs = 20
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}')

Epoch [1/22], Loss: 0.1679
Epoch [2/22], Loss: 0.0602
Epoch [3/22], Loss: 0.0424
Epoch [4/22], Loss: 0.0311
Epoch [5/22], Loss: 0.0260
Epoch [6/22], Loss: 0.0215
Epoch [7/22], Loss: 0.0198
Epoch [8/22], Loss: 0.0154
Epoch [9/22], Loss: 0.0140
Epoch [10/22], Loss: 0.0127
Epoch [11/22], Loss: 0.0143
Epoch [12/22], Loss: 0.0131
Epoch [13/22], Loss: 0.0112
Epoch [14/22], Loss: 0.0097
Epoch [15/22], Loss: 0.0110
Epoch [16/22], Loss: 0.0081
Epoch [17/22], Loss: 0.0096
Epoch [18/22], Loss: 0.0076
Epoch [19/22], Loss: 0.0072
Epoch [20/22], Loss: 0.0086
Epoch [21/22], Loss: 0.0081
Epoch [22/22], Loss: 0.0068


In [14]:
predictions = []
model.eval()
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  
        predictions.extend(predicted.cpu().numpy())

In [15]:
submission = pd.DataFrame({'ImageId': range(1, len(predictions) + 1), 'Label': predictions})
submission.to_csv('submission.csv', index=False)

### Kaggle score: 0.99007