In [8]:
!pip install kaggle pandas numpy torch scikit-learn -qqq

In [40]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [41]:
# Hyperparameters
batch_size = 64
learning_rate = 0.01
epochs = 100

In [42]:
# Load Kaggle MNIST dataset
train_data = pd.read_csv('./digit-recognizer/train.csv')
test_data = pd.read_csv('./digit-recognizer/test.csv')

In [43]:
# Prepare dataset
X = train_data.iloc[:, 1:].values / 255  # Normalize pixel values
y = train_data.iloc[:, 0].values
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [44]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float).view(-1, 1, 28, 28)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.float).view(-1, 1, 28, 28)
y_val = torch.tensor(y_val, dtype=torch.long)

In [45]:
# Create data loaders
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size, shuffle=False)

In [46]:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # First convolutional layer
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        # Second convolutional layer
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        # Max pooling
        self.pool = nn.MaxPool2d(2, 2)
        # Dropout layer
        self.dropout1 = nn.Dropout(0.25)
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 12 * 12, 128)  # 64 channels * 12x12 image size
        self.dropout2 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        # Apply convolutions, followed by max pooling
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # Flatten the tensor for the fully connected layers
        x = x.view(-1, 64 * 12 * 12)
        x = self.dropout1(x)
        # Apply first fully connected layer with ReLU
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        # Apply second fully connected layer
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


# Initialize the model, loss function, and optimizer
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)




In [47]:
# Training function
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')



In [48]:
# Validation function
def validate():
    model.eval()
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in val_loader:
            output = model(data)
            val_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    val_loss /= len(val_loader.dataset)
    print(f'\nValidation set: Average loss: {val_loss:.4f}, Accuracy: {correct}/{len(val_loader.dataset)} ({100. * correct / len(val_loader.dataset):.0f}%)\n')


In [49]:

# Run the training and validation
for epoch in range(1, epochs + 1):
    train(epoch)
    validate()

RuntimeError: shape '[-1, 9216]' is invalid for input of size 200704

In [38]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset

# Assuming the model and other necessary imports are already done

# Load the test data
test_data = pd.read_csv('./digit-recognizer/test.csv')
X_test = test_data.values / 255  # Normalize the pixel values
X_test = torch.tensor(X_test, dtype=torch.float).view(-1, 1, 28, 28)

# Create a DataLoader for the test data
test_loader = DataLoader(TensorDataset(X_test, torch.zeros(len(X_test))), batch_size=batch_size, shuffle=False)

# Function to get predictions
def get_predictions(model, data_loader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for data, _ in data_loader:
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            predictions.extend(pred.tolist())
    return [p[0] for p in predictions]

# Get predictions
predictions = get_predictions(model, test_loader)

# Create a submission DataFrame
submission = pd.DataFrame({
    'ImageId': range(1, len(predictions) + 1),
    'Label': predictions
})

# Save to CSV
submission.to_csv('submission.csv', index=False)