In [1]:
import pandas as pd
import seaborn as sns

In [2]:
train_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/test.csv')

In [3]:
train_df.head(3)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
test_df.head(3)

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB


In [6]:
train_df.isna().sum().sum()

0

In [7]:
test_df.isna().sum().sum()

0

In [10]:
pixels = train_df.drop('label', axis=1).values
labels = train_df['label']

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [13]:
labels = torch.tensor(labels, dtype=torch.long)
pixels = torch.tensor(pixels, dtype=torch.float32)

  labels = torch.tensor(labels, dtype=torch.long)


In [14]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of the CNN
model = CNN()

# Print the model architecture
print(model)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [15]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Create an instance of the CNN and move it to the appropriate device
model = CNN().to(device)


cuda


In [16]:
# Reshape the pixel data to 28x28 images
pixels = pixels.reshape(-1, 1, 28, 28)

# Normalize the pixel values (assuming they're in the range 0-255)
pixels = pixels / 255.0

# Create a TensorDataset
dataset = torch.utils.data.TensorDataset(pixels, labels)

# Create a DataLoader
batch_size = 64  # You can adjust this
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Print shapes to verify
print("Pixels shape:", pixels.shape)
print("Labels shape:", labels.shape)

# If you want to use CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pixels = pixels.to(device)
labels = labels.to(device)

Pixels shape: torch.Size([42000, 1, 28, 28])
Labels shape: torch.Size([42000])


In [17]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    for batch_pixels, batch_labels in dataloader:
        batch_pixels, batch_labels = batch_pixels.to(device), batch_labels.to(device)
        
        # Forward pass
        outputs = model(batch_pixels)
        loss = criterion(outputs, batch_labels)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 0.0533
Epoch [2/10], Loss: 0.0549
Epoch [3/10], Loss: 0.1881
Epoch [4/10], Loss: 0.0051
Epoch [5/10], Loss: 0.0023
Epoch [6/10], Loss: 0.0052
Epoch [7/10], Loss: 0.0003
Epoch [8/10], Loss: 0.0034
Epoch [9/10], Loss: 0.0007
Epoch [10/10], Loss: 0.0030


In [18]:
test_pixels = torch.tensor(test_df.values, dtype=torch.float32)

# Reshape the pixel data to 28x28 images
test_pixels = test_pixels.reshape(-1, 1, 28, 28)

# Normalize the pixel values (assuming they're in the range 0-255)
test_pixels = test_pixels / 255.0

# If you want to use CUDA
test_pixels = test_pixels.to(device)

# Make predictions
model.eval()
with torch.no_grad():
    predictions = model(test_pixels)
    _, predicted = torch.max(predictions, 1)
    
# Convert the predictions to a NumPy array
predicted = predicted.cpu().numpy()

# Create a DataFrame with the image IDs and their corresponding predictions
submission = pd.DataFrame({'ImageId': range(1, len(predicted) + 1), 'Label': predicted})



In [19]:
submission.to_csv('./data/submission.csv', index=False)