# IFT3395 Competition 2 - Winning Solution (Ensemble)
**Target**: Score > 0.53

## Strategy
1. **Hybrid Ensemble**: Combines a custom **Lightweight CNN** (PyTorch) with **ExtraTrees** (Scikit-Learn).
2. **Class Imbalance Handling**: Uses `WeightedRandomSampler` to ensure the model learns rare classes (Class 4).
3. **Training from Scratch**: No pre-trained models used, complying with strict competition rules.
4. **Test Time Augmentation (TTA)**: Improves robustness.



In [None]:
import pickle
import numpy as np
import pandas as pd
import random
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.preprocessing import StandardScaler
from tqdm.notebook import tqdm
from PIL import Image

# Set Seeds for Reproducibility
SEED = 42
def set_seed(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(SEED)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {DEVICE}")



In [None]:
# Load Data
def load_data():
    # Attempt to load from current directory 'data' folder
    if os.path.exists('data/train_data.pkl'):
        path_prefix = 'data/'
    else:
        # Fallback just in case
        path_prefix = ''
        
    with open(f'{path_prefix}train_data.pkl', 'rb') as f:
        train_data = pickle.load(f)
    with open(f'{path_prefix}test_data.pkl', 'rb') as f:
        test_data = pickle.load(f)
    return train_data, test_data

train_data, test_data = load_data()
train_images = train_data['images']  # (1080, 28, 28, 3)
train_labels = train_data['labels'].flatten()
test_images = test_data['images']    # (400, 28, 28, 3)

# Check data stats
print(f"Train Img: {train_images.shape}, Labels: {train_labels.shape}")
print(f"Test Img: {test_images.shape}")
print(f"Classes: {np.unique(train_labels)}")
class_counts = np.bincount(train_labels)
print(f"Class Counts: {class_counts}")

# Calculate Class Weights for Loss Function (Inverse Frequency)
# We want to penalize mistakes on rare classes more
total_samples = len(train_labels)
class_weights = total_samples / (len(class_counts) * class_counts)
class_weights_tensor = torch.FloatTensor(class_weights).to(DEVICE)
print(f"Class Weights: {class_weights}")



In [None]:
class QuickDrawDataset(Dataset):
    def __init__(self, images, labels=None, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img = self.images[idx]
        
        # Determine format and convert to PIL
        if img.dtype != np.uint8:
             if img.max() <= 1.0:
                 img = (img * 255).astype(np.uint8)
             else:
                 img = img.astype(np.uint8)
        
        # Convert to PIL for Transforms
        img_pil = Image.fromarray(img)
        
        if self.transform:
            img_tensor = self.transform(img_pil)
        else:
            img_tensor = transforms.ToTensor()(img_pil)
            
        if self.labels is not None:
            return img_tensor, self.labels[idx]
        return img_tensor

# Strong Augmentation for Training
train_transforms = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Validation/Test Transforms
val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])



In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, 
                          stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class SmallResNet(nn.Module):
    def __init__(self, num_classes=5):
        super(SmallResNet, self).__init__()
        # Initial: 3 -> 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        
        # 3 Stages of ResBlocks
        self.layer1 = ResidualBlock(64, 64, stride=1)
        self.layer2 = ResidualBlock(64, 128, stride=2) # 28 -> 14
        self.layer3 = ResidualBlock(128, 256, stride=2) # 14 -> 7
        self.layer4 = ResidualBlock(256, 512, stride=2) # 7 -> 3
        
        # Classifier
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

model_test = SmallResNet()
print(f"Model Parameters: {count_parameters(model_test):,}")



In [None]:
def train_one_fold(fold_idx, train_idx, val_idx, X, y):
    print(f"\n{'='*20} Fold {fold_idx+1} {'='*20}")
    
    # Prepare Data
    train_ds = QuickDrawDataset(X[train_idx], y[train_idx], transform=train_transforms)
    val_ds = QuickDrawDataset(X[val_idx], y[val_idx], transform=val_transforms)
    
    # Weighted Sampler for Imbalance
    y_train = y[train_idx]
    count = np.bincount(y_train)
    weight_per_class = 1. / torch.tensor(count, dtype=torch.float)
    samples_weight = np.array([weight_per_class[t] for t in y_train])
    samples_weight = torch.from_numpy(samples_weight)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    
    train_loader = DataLoader(train_ds, batch_size=64, sampler=sampler, num_workers=0)
    val_loader = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=0)
    
    # Init Model
    model = SmallResNet(num_classes=5).to(DEVICE)
    
    # Optimizer & Scheduler
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=40)
    criterion = nn.CrossEntropyLoss()
    
    best_acc = 0.0
    best_model = None
    
    EPOCHS = 40
    
    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            
        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
                outputs = model(imgs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        acc = correct / total
        scheduler.step()
        
        if acc > best_acc:
            best_acc = acc
            best_model = model.state_dict().copy() # Copy essential!
            
    print(f"Fold {fold_idx+1} Best Val Acc: {best_acc:.4f}")
    
    # Restore best model
    model.load_state_dict(best_model)
    return model, best_acc



In [None]:
def train_sklearn_fold(train_idx, val_idx, X_flat, y):
    X_tr, y_tr = X_flat[train_idx], y[train_idx]
    X_val, y_val = X_flat[val_idx], y[val_idx]
    
    # Use ExtraTrees
    clf = ExtraTreesClassifier(n_estimators=500, n_jobs=-1, random_state=SEED, 
                               class_weight='balanced')
    clf.fit(X_tr, y_tr)
    
    acc = clf.score(X_val, y_val)
    print(f"Sklearn ExtraTrees Val Acc: {acc:.4f}")
    return clf, acc



In [None]:
# Flatten data for Sklearn
X_flat = train_images.reshape(len(train_images), -1).astype(np.float32) / 255.0

# 5-Fold Stratified CV
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

pytorch_models = []
sklearn_models = []
val_accuracies = []

for fold_i, (train_idx, val_idx) in enumerate(skf.split(train_images, train_labels)):
    
    # 1. Train PyTorch Model
    pt_model, pt_acc = train_one_fold(fold_i, train_idx, val_idx, train_images, train_labels)
    pytorch_models.append(pt_model)
    
    # 2. Train Sklearn Model
    sk_model, sk_acc = train_sklearn_fold(train_idx, val_idx, X_flat, train_labels)
    sklearn_models.append(sk_model)
    
    val_accuracies.append((pt_acc + sk_acc)/2)

print(f"\nAverage Estimated CV Score: {np.mean(val_accuracies):.4f}")



In [None]:
# Inference Logic
def predict_test(pt_models, sk_models, test_imgs):
    # Prepare PyTorch Test Loader
    test_ds = QuickDrawDataset(test_imgs, transform=val_transforms)
    test_loader = DataLoader(test_ds, batch_size=64, shuffle=False)
    
    # Prepare Sklearn Data
    X_test_flat = test_imgs.reshape(len(test_imgs), -1).astype(np.float32) / 255.0
    
    # Arrays to store probabilities
    pt_probs = np.zeros((len(test_imgs), 5)) 
    sk_probs = np.zeros((len(test_imgs), 5))
    
    print("Predicting with PyTorch Models...")
    for model in pt_models:
        model.eval()
        fold_probs = []
        with torch.no_grad():
            for imgs in test_loader:
                imgs = imgs.to(DEVICE)
                output = model(imgs)
                # TTA Integration (Simple Flip)
                # Let's do a simple TTA here: original + h_flip
                
                # Forward Pass (Original)
                out1 = model(imgs)
                prob1 = F.softmax(out1, dim=1)
                
                # Forward Pass (H Flip)
                imgs_flip = torch.flip(imgs, [3]) # N,C,H,W
                out2 = model(imgs_flip)
                prob2 = F.softmax(out2, dim=1)
                
                # Average
                prob = (prob1 + prob2) / 2
                fold_probs.append(prob.cpu().numpy())
                
        pt_probs += np.concatenate(fold_probs, axis=0)
    pt_probs /= len(pt_models)
    
    print("Predicting with Sklearn Models...")
    for clf in sk_models:
        sk_probs += clf.predict_proba(X_test_flat)
    sk_probs /= len(sk_models)
    
    # Weighted Ensemble (50/50)
    final_probs = 0.5 * pt_probs + 0.5 * sk_probs
    predictions = np.argmax(final_probs, axis=1)
    
    return predictions

predictions = predict_test(pytorch_models, sklearn_models, test_images)
print(f"Predictions check: {predictions[:10]}")
print(f"Class Distribution: {np.bincount(predictions)}")



In [None]:
# Generate Submission
submission_df = pd.DataFrame({
    'Id': np.arange(len(predictions)),
    'Category': predictions
})

filename = 'submission_winning_solution.csv'
submission_df.to_csv(filename, index=False)
print(f"Saved submission to {filename}")

