# Kaggle Diabetic Retinopathy - Milestone 2 (PyTorch CNN)

In this notebook, we implement a **Convolutional Neural Network (CNN)** using **PyTorch** to improve upon the Milestone 1 baseline.
We use data augmentation to handle the small dataset size and train a custom CNN architecture.



In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import numpy as np
import pickle
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
import os
# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")



Using device: cpu


## 1. Dataset Implementation
We define a custom `Dataset` class to load the pickle data and apply transformations.



In [2]:
class RetinopathyDataset(Dataset):
    def __init__(self, pickle_path, transform=None, mode='train'):
        self.transform = transform
        self.mode = mode
        
        with open(pickle_path, 'rb') as f:
            data = pickle.load(f)
            
        self.images = data['images']
        if self.mode == 'train':
            self.labels = data['labels'].flatten()
        else:
            self.labels = None
            
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img = self.images[idx]
        img = Image.fromarray(img) # Convert to PIL for transforms
        
        if self.transform:
            img = self.transform(img)
            
        if self.mode == 'train':
            label = self.labels[idx]
            return img, torch.tensor(label, dtype=torch.long)
        else:
            return img



## 2. Data Augmentation & Loading
We apply aggressive data augmentation (flips, rotations, color jitter) to prevent overfitting.



In [3]:
# Hyperparameters
BATCH_SIZE = 32
VAL_SPLIT = 0.2
SEED = 42

# Transforms
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load Data
full_dataset = RetinopathyDataset('train_data.pkl', transform=train_transform, mode='train')

# Split
torch.manual_seed(SEED)
indices = torch.randperm(len(full_dataset)).tolist()
val_size = int(len(full_dataset) * VAL_SPLIT)
train_indices = indices[val_size:]
val_indices = indices[:val_size]

# Create Subsets with correct transforms
# Note: We re-instantiate to ensure validation set has no augmentation
train_subset = torch.utils.data.Subset(
    RetinopathyDataset('train_data.pkl', transform=train_transform, mode='train'), 
    train_indices
)
val_subset = torch.utils.data.Subset(
    RetinopathyDataset('train_data.pkl', transform=val_transform, mode='train'), 
    val_indices
)

train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")



FileNotFoundError: [Errno 2] No such file or directory: 'train_data.pkl'

## 3. Model Architecture
A simple CNN with 2 convolutional layers, max pooling, and dropout.



In [4]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=5):
        super(SimpleCNN, self).__init__()
        
        # Input: (3, 28, 28)
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(2, 2) # Output: (32, 14, 14)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        # Pool again -> Output: (64, 7, 7)
        
        self.flatten_dim = 64 * 7 * 7
        
        self.fc1 = nn.Linear(self.flatten_dim, 256)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, num_classes)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, self.flatten_dim)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model = SimpleCNN(num_classes=5).to(device)
print(model)



SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=3136, out_features=256, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=256, out_features=5, bias=True)
)


## 4. Training Loop
We train for 200 epochs and save the best model based on validation accuracy.



In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

EPOCHS = 200
best_val_acc = 0.0
train_losses = []
val_accs = []

print(f"Starting training for {EPOCHS} epochs...")

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
    train_acc = 100 * correct / total
    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)
    
    # Validation
    model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            
    val_acc = 100 * val_correct / val_total
    val_accs.append(val_acc)
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model_notebook.pth')
        
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {avg_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")

print(f"Best Validation Accuracy: {best_val_acc:.2f}%")



Starting training for 200 epochs...


NameError: name 'train_loader' is not defined

## 5. Visualization
Plotting the training loss and validation accuracy.



In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(val_accs, label='Val Accuracy', color='orange')
plt.title('Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.show()



## 6. Submission
Generate predictions for the test set using the best model.



In [None]:
# Load Test Data
test_dataset = RetinopathyDataset('test_data.pkl', transform=val_transform, mode='test')
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Load Best Model
model.load_state_dict(torch.load('best_model_notebook.pth'))
model.eval()

predictions = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.cpu().numpy())

# Create Submission CSV
ids = np.arange(1, len(predictions) + 1)
df = pd.DataFrame({'ID': ids, 'Label': predictions})
filename = 'submission_milestone2_notebook.csv'
df.to_csv(filename, index=False)

print(f"Submission saved to {filename}")
df.head()

