# IFT3395 Competition 2 - Milestone 2 v4

## 目标: Validation Accuracy > 0.53, 训练时间 < 15分钟

## 策略
- **ResBlock CNN**: 残差连接提高性能
- **4模型集成**: CNN + ET + RF + HistGradientBoosting
- **5折CV**: 更稳定的验证
- **多特征**: 像素 + 颜色统计

In [1]:
# Cell 1: Imports
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
import random, os, warnings, time
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms

from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score

warnings.filterwarnings('ignore')

SEED = 42
def seed_all(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

seed_all(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
t0 = time.time()

Device: cpu


In [2]:
# Cell 2: Load Data
DATA_DIR = Path('data')
with open(DATA_DIR / 'train_data.pkl', 'rb') as f:
    train_data = pickle.load(f)
with open(DATA_DIR / 'test_data.pkl', 'rb') as f:
    test_data = pickle.load(f)

X_train = train_data['images']
y_train = train_data['labels'].flatten().astype(np.int64)
X_test = test_data['images']

if X_train.max() <= 1.0:
    X_train = (X_train * 255).astype(np.uint8)
    X_test = (X_test * 255).astype(np.uint8)
else:
    X_train = X_train.astype(np.uint8)
    X_test = X_test.astype(np.uint8)

n_train, n_test = len(X_train), len(X_test)
print(f"Train: {X_train.shape}, Test: {X_test.shape}")
print(f"Classes: {np.bincount(y_train)}")

Train: (1080, 28, 28, 3), Test: (400, 28, 28, 3)
Classes: [486 128 206 194  66]


In [3]:
# Cell 3: Feature Engineering (No skimage needed)
def extract_features(images):
    """Extract features: flatten pixels + color statistics"""
    features = []
    for img in images:
        # Flatten RGB pixels
        flat = img.flatten().astype(np.float32) / 255.0
        
        # Color statistics per channel
        stats = []
        for c in range(3):
            ch = img[:, :, c].astype(np.float32)
            stats.extend([ch.mean(), ch.std(), ch.min(), ch.max(),
                          np.percentile(ch, 25), np.percentile(ch, 75)])
        
        # Grayscale stats
        gray = 0.299*img[:,:,0] + 0.587*img[:,:,1] + 0.114*img[:,:,2]
        stats.extend([gray.mean(), gray.std(), gray.min(), gray.max()])
        
        # Combine
        features.append(np.concatenate([flat, np.array(stats, dtype=np.float32)]))
    return np.array(features, dtype=np.float32)

print("Extracting features...")
X_train_feat = extract_features(X_train)
X_test_feat = extract_features(X_test)
print(f"Feature shape: {X_train_feat.shape}")

Extracting features...
Feature shape: (1080, 2374)


In [4]:
# Cell 4: Dataset & CNN Model
class ImgDataset(Dataset):
    def __init__(self, images, labels=None, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
        
    def __len__(self): return len(self.images)
    
    def __getitem__(self, i):
        img = Image.fromarray(self.images[i].astype(np.uint8))
        if self.transform: img = self.transform(img)
        if self.labels is not None:
            return img, torch.tensor(self.labels[i], dtype=torch.long)
        return img

train_tf = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(0.1, 0.1, 0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class ResBlock(nn.Module):
    def __init__(self, ch):
        super().__init__()
        self.conv1 = nn.Conv2d(ch, ch, 3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(ch)
        self.conv2 = nn.Conv2d(ch, ch, 3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(ch)
    def forward(self, x):
        return F.relu(self.bn2(self.conv2(F.relu(self.bn1(self.conv1(x))))) + x)

class CNN(nn.Module):
    def __init__(self, nc=5):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2),
            ResBlock(64),
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d(2),
            ResBlock(128),
            nn.Conv2d(128, 256, 3, padding=1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Dropout(0.4),
            nn.Linear(256, nc)
        )
    def forward(self, x): return self.layers(x)

print("Model defined.")

Model defined.


In [5]:
# Cell 5: Helpers
def get_sampler(y):
    w = 1.0 / np.maximum(np.bincount(y), 1)
    return WeightedRandomSampler(torch.from_numpy(w[y]).double(), len(y))

def train_ep(model, loader, crit, opt, dev):
    model.train()
    for x, y in loader:
        x, y = x.to(dev), y.to(dev)
        opt.zero_grad()
        loss = crit(model(x), y)
        loss.backward()
        opt.step()

@torch.no_grad()
def val_ep(model, loader, dev):
    model.eval()
    c, t, probs = 0, 0, []
    for x, y in loader:
        x, y = x.to(dev), y.to(dev)
        out = model(x)
        c += (out.argmax(1) == y).sum().item()
        t += y.size(0)
        probs.append(F.softmax(out, 1).cpu().numpy())
    return c / t, np.concatenate(probs)

@torch.no_grad()
def predict(model, loader, dev):
    model.eval()
    probs = []
    for x in loader:
        probs.append(F.softmax(model(x.to(dev)), 1).cpu().numpy())
    return np.concatenate(probs)

print("Helpers defined.")

Helpers defined.


In [6]:
# Cell 6: Training
N_FOLDS, EPOCHS, BS = 5, 30, 64
skf = StratifiedKFold(N_FOLDS, shuffle=True, random_state=SEED)

# Storage
cnn_oof = np.zeros((n_train, 5))
cnn_test = np.zeros((n_test, 5))
et_oof = np.zeros((n_train, 5))
et_test = np.zeros((n_test, 5))
rf_oof = np.zeros((n_train, 5))
rf_test = np.zeros((n_test, 5))
hgb_oof = np.zeros((n_train, 5))
hgb_test = np.zeros((n_test, 5))

print(f"Training: {N_FOLDS} folds, {EPOCHS} epochs")
print("="*50)

for fold, (tr_i, val_i) in enumerate(skf.split(X_train, y_train)):
    t1 = time.time()
    print(f"\nFold {fold+1}/{N_FOLDS}")
    
    # CNN
    tr_ds = ImgDataset(X_train[tr_i], y_train[tr_i], train_tf)
    val_ds = ImgDataset(X_train[val_i], y_train[val_i], val_tf)
    tr_ld = DataLoader(tr_ds, BS, sampler=get_sampler(y_train[tr_i]), num_workers=0)
    val_ld = DataLoader(val_ds, BS, shuffle=False, num_workers=0)
    
    model = CNN().to(device)
    crit = nn.CrossEntropyLoss(label_smoothing=0.1)
    opt = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
    sch = optim.lr_scheduler.CosineAnnealingLR(opt, EPOCHS)
    
    best_acc, best_w = 0, None
    for ep in range(EPOCHS):
        train_ep(model, tr_ld, crit, opt, device)
        acc, _ = val_ep(model, val_ld, device)
        sch.step()
        if acc > best_acc:
            best_acc = acc
            best_w = {k: v.cpu().clone() for k, v in model.state_dict().items()}
    
    model.load_state_dict(best_w)
    _, probs = val_ep(model, val_ld, device)
    cnn_oof[val_i] = probs
    
    test_ds = ImgDataset(X_test, None, val_tf)
    test_ld = DataLoader(test_ds, BS, shuffle=False, num_workers=0)
    cnn_test += predict(model, test_ld, device) / N_FOLDS
    print(f"  CNN: {best_acc:.4f}")
    
    # ET
    et = ExtraTreesClassifier(200, max_depth=25, class_weight='balanced', random_state=SEED+fold, n_jobs=-1)
    et.fit(X_train_feat[tr_i], y_train[tr_i])
    et_oof[val_i] = et.predict_proba(X_train_feat[val_i])
    et_test += et.predict_proba(X_test_feat) / N_FOLDS
    print(f"  ET:  {et.score(X_train_feat[val_i], y_train[val_i]):.4f}")
    
    # RF
    rf = RandomForestClassifier(200, max_depth=25, class_weight='balanced', random_state=SEED+fold, n_jobs=-1)
    rf.fit(X_train_feat[tr_i], y_train[tr_i])
    rf_oof[val_i] = rf.predict_proba(X_train_feat[val_i])
    rf_test += rf.predict_proba(X_test_feat) / N_FOLDS
    print(f"  RF:  {rf.score(X_train_feat[val_i], y_train[val_i]):.4f}")
    
    # HistGradientBoosting
    hgb = HistGradientBoostingClassifier(max_iter=100, max_depth=10, random_state=SEED+fold)
    hgb.fit(X_train_feat[tr_i], y_train[tr_i])
    hgb_oof[val_i] = hgb.predict_proba(X_train_feat[val_i])
    hgb_test += hgb.predict_proba(X_test_feat) / N_FOLDS
    print(f"  HGB: {hgb.score(X_train_feat[val_i], y_train[val_i]):.4f}")
    
    print(f"  Time: {(time.time()-t1)/60:.1f}min")

print(f"\nTotal: {(time.time()-t0)/60:.1f}min")

Training: 5 folds, 30 epochs

Fold 1/5
  CNN: 0.5324
  ET:  0.5324
  RF:  0.4815
  HGB: 0.5231
  Time: 2.9min

Fold 2/5
  CNN: 0.4815
  ET:  0.5139
  RF:  0.5000
  HGB: 0.4815
  Time: 2.9min

Fold 3/5
  CNN: 0.5185
  ET:  0.5185
  RF:  0.5231
  HGB: 0.5231
  Time: 3.1min

Fold 4/5
  CNN: 0.5278
  ET:  0.4907
  RF:  0.5000
  HGB: 0.4954
  Time: 2.8min

Fold 5/5
  CNN: 0.4537
  ET:  0.4722
  RF:  0.5093
  HGB: 0.4491
  Time: 2.4min

Total: 14.3min


In [7]:
# Cell 7: Ensemble
print("Individual CV Accuracy:")
print(f"  CNN: {accuracy_score(y_train, cnn_oof.argmax(1)):.4f}")
print(f"  ET:  {accuracy_score(y_train, et_oof.argmax(1)):.4f}")
print(f"  RF:  {accuracy_score(y_train, rf_oof.argmax(1)):.4f}")
print(f"  HGB: {accuracy_score(y_train, hgb_oof.argmax(1)):.4f}")

# Grid search weights
best_acc, best_w = 0, None
for w1 in np.arange(0.2, 0.6, 0.1):  # CNN
    for w2 in np.arange(0.1, 0.4, 0.1):  # ET
        for w3 in np.arange(0.1, 0.4, 0.1):  # RF
            w4 = 1 - w1 - w2 - w3
            if w4 < 0: continue
            oof = w1*cnn_oof + w2*et_oof + w3*rf_oof + w4*hgb_oof
            acc = accuracy_score(y_train, oof.argmax(1))
            if acc > best_acc:
                best_acc, best_w = acc, (w1, w2, w3, w4)

print(f"\nBest: CNN={best_w[0]:.1f}, ET={best_w[1]:.1f}, RF={best_w[2]:.1f}, HGB={best_w[3]:.1f}")
print(f"Ensemble CV: {best_acc:.4f}")

Individual CV Accuracy:
  CNN: 0.5028
  ET:  0.5056
  RF:  0.5028
  HGB: 0.4944

Best: CNN=0.5, ET=0.3, RF=0.1, HGB=0.1
Ensemble CV: 0.5139


In [8]:
# Cell 8: Submission
w1, w2, w3, w4 = best_w
final = w1*cnn_test + w2*et_test + w3*rf_test + w4*hgb_test
preds = final.argmax(1)

# IMPORTANT: ID starts from 1!
submission = pd.DataFrame({'ID': np.arange(1, n_test + 1), 'Label': preds})
submission.to_csv('submission_milestone2_v4.csv', index=False)

print("Saved: submission_milestone2_v4.csv")
print(f"\nDistribution:\n{submission['Label'].value_counts().sort_index()}")
print(f"\nTotal time: {(time.time()-t0)/60:.1f}min")

Saved: submission_milestone2_v4.csv

Distribution:
Label
0    239
1     50
2     47
3     58
4      6
Name: count, dtype: int64

Total time: 14.3min
