In [209]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

In [None]:
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 10

transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = datasets.MNIST('data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST('data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [211]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes, hidden_sizes=[256, 128, 64]): 
        super(NN, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
        self.fc4 = nn.Linear(hidden_sizes[2], num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

In [212]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NN(input_size=784, num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)

In [213]:
for epoch in range(num_epochs):
    running_loss = 0

    for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        data = data.reshape(data.shape[0], -1)
        
        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()

        # Gradient descent or adam step
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

100%|██████████| 938/938 [00:17<00:00, 52.64it/s]


Epoch 1/10, Loss: 0.3839


100%|██████████| 938/938 [00:16<00:00, 57.54it/s]


Epoch 2/10, Loss: 0.1565


100%|██████████| 938/938 [00:18<00:00, 49.56it/s]


Epoch 3/10, Loss: 0.1165


100%|██████████| 938/938 [00:18<00:00, 52.00it/s]


Epoch 4/10, Loss: 0.0969


100%|██████████| 938/938 [00:17<00:00, 54.35it/s]


Epoch 5/10, Loss: 0.0844


100%|██████████| 938/938 [00:19<00:00, 48.35it/s]


Epoch 6/10, Loss: 0.0737


100%|██████████| 938/938 [00:17<00:00, 54.69it/s]


Epoch 7/10, Loss: 0.0696


100%|██████████| 938/938 [00:16<00:00, 56.13it/s]


Epoch 8/10, Loss: 0.0658


100%|██████████| 938/938 [00:16<00:00, 56.66it/s]


Epoch 9/10, Loss: 0.0629


100%|██████████| 938/938 [00:16<00:00, 55.21it/s]

Epoch 10/10, Loss: 0.0584





In [247]:
def check_accuracy_and_save_data(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    data = {
        "ID": [],
        "target": [],
    }
    
    with torch.no_grad():
        # Loop through the data
        for x, y in loader:

            # Move data to device
            x = x.to(device)
            y = y.to(device)

            # Get to the correct shape
            x = x.reshape(x.shape[0], -1)

            # Forward pass
            scores = model(x)
            _, predictions = scores.max(1)

            # Adding predictions to the data dict
            for i, pred in enumerate(predictions):
                data["ID"].append(num_samples + i)
                data["target"].append(pred.item())

            # Check how many we got correct
            num_correct += (predictions == y).sum()

            # Keep track of the number of samples
            num_samples += predictions.size(0)

    model.train()
    return num_correct / num_samples, data

accuracy, data = check_accuracy_and_save_data(test_loader, model)
print(f"Test Accuracy: {100 * accuracy:.4f}%")

Test Accuracy: 98.3100%


In [249]:
import pandas as pd

df = pd.DataFrame(data)
df.to_csv("submission.csv", index=False)