# IFT3395 Competition 2 - Milestone 2 v3 (Fast Version)

## 目标: Validation Accuracy > 0.53, 训练时间 < 15分钟

## 优化策略
- **3折CV** (而非5折) - 减少40%训练时间
- **20 Epochs** - 足够收敛
- **更小的Sklearn模型** - 150棵树
- **简化CNN** - 更少的通道数

In [1]:
# ==================== Cell 1: Imports ====================
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
import random
import os
import warnings
import time
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms

from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

warnings.filterwarnings('ignore')

SEED = 42
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

seed_everything(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
start_time = time.time()

Device: cpu


In [2]:
# ==================== Cell 2: Load Data ====================
DATA_DIR = Path('data')

with open(DATA_DIR / 'train_data.pkl', 'rb') as f:
    train_data = pickle.load(f)
with open(DATA_DIR / 'test_data.pkl', 'rb') as f:
    test_data = pickle.load(f)

X_train_raw = train_data['images']
y_train = train_data['labels'].flatten().astype(np.int64)
X_test_raw = test_data['images']

if X_train_raw.max() <= 1.0:
    X_train_raw = (X_train_raw * 255).astype(np.uint8)
    X_test_raw = (X_test_raw * 255).astype(np.uint8)
else:
    X_train_raw = X_train_raw.astype(np.uint8)
    X_test_raw = X_test_raw.astype(np.uint8)

print(f"Train: {X_train_raw.shape}, Test: {X_test_raw.shape}")
print(f"Classes: {np.bincount(y_train)}")

Train: (1080, 28, 28, 3), Test: (400, 28, 28, 3)
Classes: [486 128 206 194  66]


In [3]:
# ==================== Cell 3: Dataset & Model ====================
class SimpleDataset(Dataset):
    def __init__(self, images, labels=None, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img = Image.fromarray(self.images[idx].astype(np.uint8))
        if self.transform:
            img = self.transform(img)
        if self.labels is not None:
            return img, torch.tensor(self.labels[idx], dtype=torch.long)
        return img

# Transforms
train_tf = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Fast CNN - fewer channels
class FastCNN(nn.Module):
    def __init__(self, num_classes=5):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.AdaptiveAvgPool2d(1)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )
        
    def forward(self, x):
        return self.classifier(self.features(x))

print("Model and dataset defined.")

Model and dataset defined.


In [4]:
# ==================== Cell 4: Helper Functions ====================
def get_sampler(labels):
    counts = np.bincount(labels)
    weights = 1.0 / np.maximum(counts, 1)
    sample_weights = weights[labels]
    return WeightedRandomSampler(torch.from_numpy(sample_weights).double(), len(sample_weights))

def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    correct, total = 0, 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        out = model(imgs)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()
        _, pred = out.max(1)
        total += labels.size(0)
        correct += pred.eq(labels).sum().item()
    return correct / total

@torch.no_grad()
def validate(model, loader, device):
    model.eval()
    correct, total = 0, 0
    probs = []
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        out = model(imgs)
        _, pred = out.max(1)
        total += labels.size(0)
        correct += pred.eq(labels).sum().item()
        probs.append(F.softmax(out, dim=1).cpu().numpy())
    return correct / total, np.concatenate(probs)

@torch.no_grad()
def predict(model, loader, device):
    model.eval()
    probs = []
    for imgs in loader:
        imgs = imgs.to(device)
        out = model(imgs)
        probs.append(F.softmax(out, dim=1).cpu().numpy())
    return np.concatenate(probs)

print("Helper functions defined.")

Helper functions defined.


In [5]:
# ==================== Cell 5: Fast Training ====================
N_FOLDS = 3  # 3折更快
EPOCHS = 20  # 20个epoch足够
BATCH_SIZE = 64  # 大batch更快

skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

# Preallocate
cnn_oof = np.zeros((len(y_train), 5))
cnn_test = np.zeros((len(X_test_raw), 5))
et_oof = np.zeros((len(y_train), 5))
et_test = np.zeros((len(X_test_raw), 5))
rf_oof = np.zeros((len(y_train), 5))
rf_test = np.zeros((len(X_test_raw), 5))

# Flatten for sklearn
X_flat = X_train_raw.reshape(len(X_train_raw), -1).astype(np.float32) / 255.0
X_test_flat = X_test_raw.reshape(len(X_test_raw), -1).astype(np.float32) / 255.0

print(f"Config: {N_FOLDS} folds, {EPOCHS} epochs, batch={BATCH_SIZE}")
print("="*50)

Config: 3 folds, 20 epochs, batch=64


In [6]:
# ==================== Cell 6: Main Training Loop ====================
for fold, (tr_idx, val_idx) in enumerate(skf.split(X_train_raw, y_train)):
    fold_start = time.time()
    print(f"\n--- Fold {fold+1}/{N_FOLDS} ---")
    
    X_tr, y_tr = X_train_raw[tr_idx], y_train[tr_idx]
    X_val, y_val = X_train_raw[val_idx], y_train[val_idx]
    
    # === CNN ===
    train_ds = SimpleDataset(X_tr, y_tr, train_tf)
    val_ds = SimpleDataset(X_val, y_val, val_tf)
    train_loader = DataLoader(train_ds, BATCH_SIZE, sampler=get_sampler(y_tr), num_workers=0)
    val_loader = DataLoader(val_ds, BATCH_SIZE, shuffle=False, num_workers=0)
    
    model = FastCNN(5).to(device)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.AdamW(model.parameters(), lr=0.002, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
    
    best_acc, best_weights = 0, None
    for ep in range(EPOCHS):
        train_epoch(model, train_loader, criterion, optimizer, device)
        val_acc, _ = validate(model, val_loader, device)
        scheduler.step()
        if val_acc > best_acc:
            best_acc = val_acc
            best_weights = {k: v.cpu().clone() for k, v in model.state_dict().items()}
    
    model.load_state_dict(best_weights)
    _, val_probs = validate(model, val_loader, device)
    cnn_oof[val_idx] = val_probs
    
    # Test prediction
    test_ds = SimpleDataset(X_test_raw, None, val_tf)
    test_loader = DataLoader(test_ds, BATCH_SIZE, shuffle=False, num_workers=0)
    cnn_test += predict(model, test_loader, device) / N_FOLDS
    print(f"  CNN: {best_acc:.4f}")
    
    # === ExtraTrees (快速版) ===
    et = ExtraTreesClassifier(n_estimators=150, max_depth=20, class_weight='balanced', 
                              random_state=SEED+fold, n_jobs=-1)
    et.fit(X_flat[tr_idx], y_train[tr_idx])
    et_acc = et.score(X_flat[val_idx], y_train[val_idx])
    et_oof[val_idx] = et.predict_proba(X_flat[val_idx])
    et_test += et.predict_proba(X_test_flat) / N_FOLDS
    print(f"  ET:  {et_acc:.4f}")
    
    # === RandomForest (快速版) ===
    rf = RandomForestClassifier(n_estimators=150, max_depth=20, class_weight='balanced',
                                random_state=SEED+fold, n_jobs=-1)
    rf.fit(X_flat[tr_idx], y_train[tr_idx])
    rf_acc = rf.score(X_flat[val_idx], y_train[val_idx])
    rf_oof[val_idx] = rf.predict_proba(X_flat[val_idx])
    rf_test += rf.predict_proba(X_test_flat) / N_FOLDS
    print(f"  RF:  {rf_acc:.4f}")
    
    print(f"  Fold time: {(time.time()-fold_start)/60:.1f} min")

print(f"\nTotal training time: {(time.time()-start_time)/60:.1f} min")


--- Fold 1/3 ---
  CNN: 0.4083
  ET:  0.4917
  RF:  0.4833
  Fold time: 0.5 min

--- Fold 2/3 ---
  CNN: 0.4861
  ET:  0.5000
  RF:  0.5389
  Fold time: 0.5 min

--- Fold 3/3 ---
  CNN: 0.4194
  ET:  0.4917
  RF:  0.4861
  Fold time: 0.5 min

Total training time: 1.5 min


In [7]:
# ==================== Cell 7: Ensemble ====================
cnn_acc = accuracy_score(y_train, np.argmax(cnn_oof, axis=1))
et_acc = accuracy_score(y_train, np.argmax(et_oof, axis=1))
rf_acc = accuracy_score(y_train, np.argmax(rf_oof, axis=1))

print("Individual Model CV Accuracy:")
print(f"  CNN: {cnn_acc:.4f}")
print(f"  ET:  {et_acc:.4f}")
print(f"  RF:  {rf_acc:.4f}")

# 快速权重搜索
best_acc, best_w = 0, None
for w1 in [0.3, 0.4, 0.5, 0.6]:
    for w2 in [0.2, 0.3, 0.4]:
        w3 = 1 - w1 - w2
        if w3 < 0: continue
        oof = w1 * cnn_oof + w2 * et_oof + w3 * rf_oof
        acc = accuracy_score(y_train, np.argmax(oof, axis=1))
        if acc > best_acc:
            best_acc, best_w = acc, (w1, w2, w3)

print(f"\nBest Ensemble: CNN={best_w[0]}, ET={best_w[1]}, RF={best_w[2]}")
print(f"Ensemble CV Accuracy: {best_acc:.4f}")

Individual Model CV Accuracy:
  CNN: 0.4380
  ET:  0.4944
  RF:  0.5028

Best Ensemble: CNN=0.5, ET=0.2, RF=0.3
Ensemble CV Accuracy: 0.4972


In [8]:
# ==================== Cell 8: Submission ====================
w1, w2, w3 = best_w
final_probs = w1 * cnn_test + w2 * et_test + w3 * rf_test
preds = np.argmax(final_probs, axis=1)

submission = pd.DataFrame({'ImageId': np.arange(len(preds)), 'Label': preds})
submission.to_csv('submission_milestone2_v3.csv', index=False)

print("Saved: submission_milestone2_v3.csv")
print(f"\nPrediction distribution:")
print(submission['Label'].value_counts().sort_index())
print(f"\nTotal time: {(time.time()-start_time)/60:.1f} min")

Saved: submission_milestone2_v3.csv

Prediction distribution:
Label
0    208
1     60
2     35
3     89
4      8
Name: count, dtype: int64

Total time: 1.5 min
