# IFT3395 Competition 2 - Milestone 2: Advanced CNN Ensemble (PyTorch)

目标：使用PyTorch构建CNN模型，结合数据增强、交叉验证集成和TTA，超越leaderboard最高分数0.54。

## 核心策略
1. **CNN架构**：使用卷积神经网络处理图像数据（比MLP更适合）
2. **数据增强**：训练时随机翻转、旋转、颜色抖动等
3. **交叉验证集成**：5折CV训练多个模型并集成
4. **测试时增强（TTA）**：测试时使用多种增强模式提升预测稳定性
5. **类别权重**：处理类别不平衡问题
6. **早停机制**：防止过拟合


## 安装依赖库

首先需要安装必要的Python库。如果已经安装，可以跳过此步骤。


In [1]:
# 安装必要的依赖库
# 如果已经安装，可以注释掉或跳过此cell

import sys
import subprocess
import platform

def install_package(package):
    """安装Python包"""
    try:
        __import__(package)
        print(f"✓ {package} 已安装")
        return True
    except ImportError:
        print(f"正在安装 {package}...")
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
            print(f"✓ {package} 安装完成")
            return True
        except Exception as e:
            print(f"✗ {package} 安装失败: {e}")
            return False

def check_pytorch():
    """检查PyTorch是否可用"""
    try:
        import torch
        # 尝试执行一个简单操作来验证PyTorch是否正常工作
        _ = torch.tensor([1.0])
        print(f"✓ PyTorch 已安装且可用 (版本: {torch.__version__})")
        return True
    except ImportError:
        print("PyTorch 未安装")
        return False
    except Exception as e:
        print(f"✗ PyTorch 导入失败: {type(e).__name__}: {e}")
        print("\n⚠️  PyTorch DLL加载错误解决方案：")
        print("1. 卸载并重新安装PyTorch:")
        print("   pip uninstall torch torchvision -y")
        print("   pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu")
        print("\n2. 如果问题仍然存在，可能需要安装Visual C++ Redistributable:")
        print("   https://aka.ms/vs/17/release/vc_redist.x64.exe")
        print("\n3. 或者使用conda安装（如果可用）:")
        print("   conda install pytorch torchvision -c pytorch")
        return False

# 检查Python版本
python_version = sys.version_info
print(f"Python 版本: {python_version.major}.{python_version.minor}.{python_version.micro}")
print(f"系统: {platform.system()} {platform.release()}\n")

# 检查PyTorch
pytorch_ok = check_pytorch()

if not pytorch_ok:
    print("\n正在尝试重新安装PyTorch...")
    try:
        # 先卸载
        print("卸载旧版本...")
        subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "torch", "torchvision", "-y"], 
                            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    except:
        pass
    
    # 安装CPU版本（使用官方索引）
    print("安装PyTorch CPU版本...")
    try:
        subprocess.check_call([
            sys.executable, "-m", "pip", "install", 
            "torch", "torchvision", 
            "--index-url", "https://download.pytorch.org/whl/cpu"
        ])
        print("✓ PyTorch 安装完成，请重新运行此cell验证")
    except Exception as e:
        print(f"✗ 安装失败: {e}")
        print("\n请手动在命令行运行以下命令:")
        print("pip uninstall torch torchvision -y")
        print("pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu")

# 安装scikit-learn
install_package("scikit-learn")

# 安装Pillow（PIL）
install_package("Pillow")

# 安装numpy（通常已安装，但确保版本足够新）
try:
    import numpy as np
    print(f"✓ NumPy 已安装 (版本: {np.__version__})")
except ImportError:
    install_package("numpy")

print("\n" + "="*50)
if pytorch_ok or check_pytorch():
    print("✓ 所有依赖库检查完成！")
else:
    print("⚠️  PyTorch 可能仍有问题，请按照上述提示解决")
print("="*50)


Python 版本: 3.14.0
系统: Windows 11

✓ PyTorch 已安装且可用 (版本: 2.9.1+cpu)
正在安装 scikit-learn...
✓ scikit-learn 安装完成
正在安装 Pillow...
✓ Pillow 安装完成
✓ NumPy 已安装 (版本: 2.3.5)

✓ 所有依赖库检查完成！


In [2]:
import csv
import math
import pickle
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import warnings
warnings.filterwarnings('ignore')

import numpy as np
from sklearn.model_selection import StratifiedKFold
from PIL import Image

# 尝试导入PyTorch
USE_PYTORCH = False
try:
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.optim as optim
    from torch.utils.data import Dataset, DataLoader
    from torchvision import transforms
    
    # 测试PyTorch是否真的可用
    _ = torch.tensor([1.0])
    USE_PYTORCH = True
    print("✓ PyTorch 可用，将使用CNN模型")
except Exception as e:
    print(f"⚠️  PyTorch 不可用 ({type(e).__name__})，将使用scikit-learn备用方案")
    print("   这可能是由于Python 3.14兼容性问题或DLL加载错误")
    print("   备用方案将使用RandomForest + ExtraTrees集成，性能仍然很好")
    USE_PYTORCH = False

# 设置随机种子
SEED = 42
np.random.seed(SEED)

if USE_PYTORCH:
    torch.manual_seed(SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(SEED)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'Using device: {device}')
else:
    # 导入scikit-learn备用方案所需的库
    from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import accuracy_score
    print('Using scikit-learn ensemble method')

DATA_DIR = Path('data')


✓ PyTorch 可用，将使用CNN模型
Using device: cpu


In [3]:
def load_split(split: str) -> Dict[str, np.ndarray]:
    path = DATA_DIR / f"{split}_data.pkl"
    with path.open('rb') as f:
        return pickle.load(f)

train_data = load_split('train')
test_data = load_split('test')

# 加载图像数据（原始数据可能是0-255范围的uint8或float32）
images_raw = train_data['images']
test_images_raw = test_data['images']

# 转换为float32并确保在0-255范围（用于后续转换为PIL）
if images_raw.dtype == np.uint8 or images_raw.max() > 1.0:
    images = images_raw.astype(np.float32)
    test_images = test_images_raw.astype(np.float32)
else:
    # 如果已经是0-1范围，转换为0-255范围
    images = (images_raw * 255.0).astype(np.float32)
    test_images = (test_images_raw * 255.0).astype(np.float32)

labels = train_data['labels'].reshape(-1).astype(int)
num_classes = len(np.unique(labels))
print(f"Train: {images.shape}, Test: {test_images.shape}, Classes: {num_classes}")
print(f"Image value range: [{images.min():.1f}, {images.max():.1f}]")

# 计算类别权重
unique, counts = np.unique(labels, return_counts=True)
class_weights = counts.sum() / (counts.astype(np.float32) + 1e-6)
class_weights = class_weights / class_weights.mean()
print('Class weights:', class_weights)

if USE_PYTORCH:
    class_weights_tensor = torch.FloatTensor(class_weights).to(device)


Train: (1080, 28, 28, 3), Test: (400, 28, 28, 3), Classes: 5
Image value range: [0.0, 255.0]
Class weights: [0.29368767 1.11509536 0.69287478 0.73573302 2.16260918]


## PyTorch CNN模型训练部分

**⚠️ 重要提示**: 
- 如果PyTorch可用，请继续执行下面的所有cell
- 如果PyTorch不可用（已在上方检测到），请**跳过**下面的PyTorch相关cell（Cell 6-17），直接跳到最后使用scikit-learn备用方案（Cell 19）


In [4]:
if not USE_PYTORCH:
    print("⚠️ PyTorch不可用，请跳过此部分，直接跳到最后使用scikit-learn备用方案")
    raise RuntimeError("PyTorch不可用，请跳到最后使用scikit-learn备用方案（Cell 19）")

class ImageDataset(Dataset):
    def __init__(self, images, labels=None, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img = self.images[idx].copy()
        
        # 确保图像在0-255范围内（处理float32的0-1范围或uint8的0-255范围）
        if img.max() <= 1.0:
            img = (img * 255).astype(np.uint8)
        else:
            img = img.astype(np.uint8)
        
        # 确保图像是HWC格式
        if len(img.shape) == 3 and img.shape[2] == 3:
            pass  # 已经是HWC格式
        else:
            raise ValueError(f"Unexpected image shape: {img.shape}")
        
        if self.transform:
            img = self.transform(img)
        else:
            # 默认：转换为tensor并归一化
            img = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0
        
        if self.labels is not None:
            return img, self.labels[idx]
        return img

# 训练时的数据增强
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 验证/测试时的标准化
val_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# TTA变换列表（使用固定的变换，不使用随机）
def fixed_rotation_10(img):
    """固定旋转10度"""
    return img.rotate(10, resample=Image.BILINEAR)

def fixed_rotation_minus_10(img):
    """固定旋转-10度"""
    return img.rotate(-10, resample=Image.BILINEAR)

tta_transforms = [
    transforms.Compose([transforms.ToPILImage(), transforms.ToTensor(), 
                       transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),  # 原始
    transforms.Compose([transforms.ToPILImage(), transforms.Lambda(lambda x: x.transpose(Image.FLIP_LEFT_RIGHT)), 
                       transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),  # 水平翻转
    transforms.Compose([transforms.ToPILImage(), transforms.Lambda(lambda x: x.transpose(Image.FLIP_TOP_BOTTOM)), 
                       transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),  # 垂直翻转
    transforms.Compose([transforms.ToPILImage(), transforms.Lambda(fixed_rotation_10), 
                       transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),  # 旋转+10度
    transforms.Compose([transforms.ToPILImage(), transforms.Lambda(fixed_rotation_minus_10), 
                       transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),  # 旋转-10度
]


In [5]:
class CNNClassifier(nn.Module):
    def __init__(self, num_classes=5, dropout=0.3):
        super(CNNClassifier, self).__init__()
        
        # 第一个卷积块
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout2d(dropout)
        
        # 第二个卷积块
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.dropout2 = nn.Dropout2d(dropout)
        
        # 第三个卷积块
        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(128)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.bn6 = nn.BatchNorm2d(128)
        self.pool3 = nn.AdaptiveAvgPool2d((1, 1))
        
        # 全连接层
        self.fc1 = nn.Linear(128, 256)
        self.bn_fc = nn.BatchNorm1d(256)
        self.dropout_fc = nn.Dropout(dropout)
        self.fc2 = nn.Linear(256, num_classes)
        
    def forward(self, x):
        # 第一个卷积块
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool1(x)
        x = self.dropout1(x)
        
        # 第二个卷积块
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = self.pool2(x)
        x = self.dropout2(x)
        
        # 第三个卷积块
        x = F.relu(self.bn5(self.conv5(x)))
        x = F.relu(self.bn6(self.conv6(x)))
        x = self.pool3(x)
        
        # 展平
        x = x.view(x.size(0), -1)
        
        # 全连接层
        x = F.relu(self.bn_fc(self.fc1(x)))
        x = self.dropout_fc(x)
        x = self.fc2(x)
        
        return x


In [6]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        
        # 梯度裁剪
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


## 交叉验证训练


In [7]:
K = 5
skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=SEED)

fold_models = []
fold_val_accs = []

# 超参数
params = {
    'lr': 0.001,
    'weight_decay': 1e-4,
    'dropout': 0.3,
    'batch_size': 32,
    'epochs': 100,
    'patience': 15
}

for fold, (train_idx, val_idx) in enumerate(skf.split(images, labels)):
    print(f"\n{'='*50}")
    print(f"Fold {fold + 1}/{K}")
    print(f"{'='*50}")
    
    # 准备数据
    train_images_fold = images[train_idx]
    train_labels_fold = labels[train_idx]
    val_images_fold = images[val_idx]
    val_labels_fold = labels[val_idx]
    
    train_dataset = ImageDataset(train_images_fold, train_labels_fold, transform=train_transform)
    val_dataset = ImageDataset(val_images_fold, val_labels_fold, transform=val_transform)
    
    train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=params['batch_size'], shuffle=False, num_workers=0)
    
    # 创建模型
    model = CNNClassifier(num_classes=num_classes, dropout=params['dropout']).to(device)
    
    # 损失函数（带类别权重）
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
    
    # 优化器（使用余弦退火学习率调度）
    optimizer = optim.AdamW(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'])
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=params['epochs'], eta_min=1e-6)
    
    # 训练
    best_val_acc = 0.0
    best_model_state = None
    patience_counter = 0
    
    for epoch in range(params['epochs']):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        scheduler.step()
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{params['epochs']} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
                  f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        # 早停
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict().copy()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= params['patience']:
                print(f"Early stopping at epoch {epoch+1}")
                break
    
    # 加载最佳模型
    model.load_state_dict(best_model_state)
    fold_models.append(model)
    fold_val_accs.append(best_val_acc)
    print(f"Fold {fold + 1} Best Val Acc: {best_val_acc:.4f}")

print(f"\n{'='*50}")
print(f"Mean Val Acc: {np.mean(fold_val_accs):.4f} ± {np.std(fold_val_accs):.4f}")
print(f"{'='*50}")



Fold 1/5
Epoch 10/100 - Train Loss: 1.5923, Train Acc: 0.2951, Val Loss: 1.5361, Val Acc: 0.4769
Epoch 20/100 - Train Loss: 1.5429, Train Acc: 0.3229, Val Loss: 1.5248, Val Acc: 0.2593
Early stopping at epoch 25
Fold 1 Best Val Acc: 0.4769

Fold 2/5
Epoch 10/100 - Train Loss: 1.5779, Train Acc: 0.3171, Val Loss: 1.5475, Val Acc: 0.3704
Epoch 20/100 - Train Loss: 1.5406, Train Acc: 0.2998, Val Loss: 1.5639, Val Acc: 0.3241
Epoch 30/100 - Train Loss: 1.5573, Train Acc: 0.2674, Val Loss: 1.5587, Val Acc: 0.3796
Early stopping at epoch 36
Fold 2 Best Val Acc: 0.4213

Fold 3/5
Epoch 10/100 - Train Loss: 1.5651, Train Acc: 0.3542, Val Loss: 1.5022, Val Acc: 0.3333
Epoch 20/100 - Train Loss: 1.5366, Train Acc: 0.3368, Val Loss: 1.5288, Val Acc: 0.3333
Epoch 30/100 - Train Loss: 1.5374, Train Acc: 0.3380, Val Loss: 1.5065, Val Acc: 0.3472
Epoch 40/100 - Train Loss: 1.5271, Train Acc: 0.3958, Val Loss: 1.5088, Val Acc: 0.3796
Early stopping at epoch 46
Fold 3 Best Val Acc: 0.4954

Fold 4/5
Epo

## 全量数据训练额外模型

In [8]:
# 使用90%训练，10%验证进行全量训练
from sklearn.model_selection import train_test_split

train_images_full, val_images_full, train_labels_full, val_labels_full = train_test_split(
    images, labels, test_size=0.1, random_state=SEED, stratify=labels
)

full_train_dataset = ImageDataset(train_images_full, train_labels_full, transform=train_transform)
full_val_dataset = ImageDataset(val_images_full, val_labels_full, transform=val_transform)

full_train_loader = DataLoader(full_train_dataset, batch_size=params['batch_size'], shuffle=True, num_workers=0)
full_val_loader = DataLoader(full_val_dataset, batch_size=params['batch_size'], shuffle=False, num_workers=0)

# 创建全量模型
full_model = CNNClassifier(num_classes=num_classes, dropout=params['dropout']).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
optimizer = optim.AdamW(full_model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'])
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=params['epochs'], eta_min=1e-6)

best_val_acc = 0.0
best_model_state = None
patience_counter = 0

print("Training full model...")
for epoch in range(params['epochs']):
    train_loss, train_acc = train_epoch(full_model, full_train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(full_model, full_val_loader, criterion, device)
    scheduler.step()
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{params['epochs']} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = full_model.state_dict().copy()
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= params['patience']:
            print(f"Early stopping at epoch {epoch+1}")
            break

full_model.load_state_dict(best_model_state)
print(f"Full model Best Val Acc: {best_val_acc:.4f}")

# 将所有模型加入列表
all_models = fold_models + [full_model]
print(f"Total models for ensemble: {len(all_models)}")


Training full model...
Epoch 10/100 - Train Loss: 1.5561, Train Acc: 0.3261, Val Loss: 1.5457, Val Acc: 0.3426
Early stopping at epoch 19
Full model Best Val Acc: 0.3611
Total models for ensemble: 6


## 测试时增强（TTA）预测


In [9]:
def predict_with_tta(models, test_images, tta_transforms, device, batch_size=32):
    """使用TTA进行预测"""
    all_probs = []
    
    for model in models:
        model.eval()
        model_probs = []
        
        # 对每种TTA变换进行预测
        for tta_transform in tta_transforms:
            test_dataset = ImageDataset(test_images, transform=tta_transform)
            test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
            
            probs_list = []
            with torch.no_grad():
                for images in test_loader:
                    images = images.to(device)
                    outputs = model(images)
                    probs = F.softmax(outputs, dim=1)
                    probs_list.append(probs.cpu().numpy())
            
            model_probs.append(np.concatenate(probs_list, axis=0))
        
        # 对每种TTA变换的预测求平均
        model_avg_probs = np.mean(model_probs, axis=0)
        all_probs.append(model_avg_probs)
    
    # 对所有模型的预测求平均
    ensemble_probs = np.mean(all_probs, axis=0)
    predictions = np.argmax(ensemble_probs, axis=1)
    
    return predictions, ensemble_probs

print("Generating predictions with TTA...")
test_predictions, test_probs = predict_with_tta(all_models, test_images, tta_transforms, device)
print(f"Predictions shape: {test_predictions.shape}")
print(f"Prediction distribution: {np.bincount(test_predictions)}")


Generating predictions with TTA...
Predictions shape: (400,)
Prediction distribution: [164  20  64   1 151]


## 生成提交文件


---

## 备用方案：使用scikit-learn（当PyTorch不可用时）

如果PyTorch无法使用，以下代码将自动执行，使用scikit-learn的集成方法。


In [10]:
# ============================================================================
# scikit-learn备用方案
# ============================================================================

if not USE_PYTORCH:
    print("="*60)
    print("使用scikit-learn备用方案")
    print("="*60)
    
    # 数据预处理：展平图像
    def flatten_images(imgs):
        return imgs.reshape(len(imgs), -1).astype(np.float32) / 255.0
    
    X_train = flatten_images(images)
    X_test = flatten_images(test_images)
    
    # 标准化
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    print(f"训练数据形状: {X_train_scaled.shape}")
    print(f"测试数据形状: {X_test_scaled.shape}")
    
    # 5折交叉验证训练多个模型
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
    fold_models = []
    fold_scores = []
    
    print("\n开始交叉验证训练...")
    for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_scaled, labels)):
        print(f"\nFold {fold + 1}/5")
        
        X_train_fold = X_train_scaled[train_idx]
        y_train_fold = labels[train_idx]
        X_val_fold = X_train_scaled[val_idx]
        y_val_fold = labels[val_idx]
        
        # 创建多个不同的模型
        models = [
            RandomForestClassifier(
                n_estimators=300,
                max_depth=20,
                min_samples_split=5,
                min_samples_leaf=2,
                class_weight='balanced',
                random_state=SEED + fold,
                n_jobs=-1
            ),
            ExtraTreesClassifier(
                n_estimators=300,
                max_depth=20,
                min_samples_split=5,
                min_samples_leaf=2,
                class_weight='balanced',
                random_state=SEED + fold + 10,
                n_jobs=-1
            ),
            RandomForestClassifier(
                n_estimators=200,
                max_depth=25,
                min_samples_split=3,
                min_samples_leaf=1,
                class_weight='balanced',
                random_state=SEED + fold + 20,
                n_jobs=-1
            )
        ]
        
        # 训练每个模型
        fold_predictions = []
        for i, model in enumerate(models):
            model.fit(X_train_fold, y_train_fold)
            val_pred = model.predict(X_val_fold)
            val_acc = accuracy_score(y_val_fold, val_pred)
            fold_predictions.append(model.predict_proba(X_val_fold))
            print(f"  模型 {i+1} 验证准确率: {val_acc:.4f}")
        
        # 集成预测（平均概率）
        ensemble_probs = np.mean(fold_predictions, axis=0)
        ensemble_pred = np.argmax(ensemble_probs, axis=1)
        ensemble_acc = accuracy_score(y_val_fold, ensemble_pred)
        print(f"  集成验证准确率: {ensemble_acc:.4f}")
        
        fold_models.append(models)
        fold_scores.append(ensemble_acc)
    
    print(f"\n平均验证准确率: {np.mean(fold_scores):.4f} ± {np.std(fold_scores):.4f}")
    
    # 在全量数据上训练最终模型
    print("\n训练全量模型...")
    final_models = [
        RandomForestClassifier(
            n_estimators=300, max_depth=20, min_samples_split=5, min_samples_leaf=2,
            class_weight='balanced', random_state=SEED + 100, n_jobs=-1
        ),
        ExtraTreesClassifier(
            n_estimators=300, max_depth=20, min_samples_split=5, min_samples_leaf=2,
            class_weight='balanced', random_state=SEED + 110, n_jobs=-1
        ),
        RandomForestClassifier(
            n_estimators=200, max_depth=25, min_samples_split=3, min_samples_leaf=1,
            class_weight='balanced', random_state=SEED + 120, n_jobs=-1
        )
    ]
    
    for model in final_models:
        model.fit(X_train_scaled, labels)
    
    all_models_sklearn = []
    for fold_models_list in fold_models:
        all_models_sklearn.extend(fold_models_list)
    all_models_sklearn.extend(final_models)
    
    print(f"总模型数: {len(all_models_sklearn)}")
    
    # 对测试集进行预测
    print("\n生成测试集预测...")
    test_probs_list = []
    for model in all_models_sklearn:
        test_probs_list.append(model.predict_proba(X_test_scaled))
    
    # 集成所有模型的预测
    test_ensemble_probs = np.mean(test_probs_list, axis=0)
    test_predictions = np.argmax(test_ensemble_probs, axis=1)
    
    print(f"预测分布: {np.bincount(test_predictions)}")
    
    # 生成提交文件
    submission_path = Path('submission_milestone2.csv')
    with submission_path.open('w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['ID', 'Label'])
        for idx, label in enumerate(test_predictions, start=1):
            writer.writerow([str(idx), int(label)])
    
    print(f'\n提交文件已保存: {submission_path.resolve()}')
    print(f'总预测数: {len(test_predictions)}')
    
else:
    print("PyTorch可用，将使用CNN模型（见上方代码）")


PyTorch可用，将使用CNN模型（见上方代码）


In [11]:
submission_path = Path('submission_milestone2.csv')
with submission_path.open('w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['ID', 'Label'])
    for idx, label in enumerate(test_predictions, start=1):
        writer.writerow([str(idx), int(label)])

print(f'Submission saved to {submission_path.resolve()}')
print(f'Total predictions: {len(test_predictions)}')


Submission saved to C:\Users\yudim\Downloads\IFT3395_Competition2\submission_milestone2.csv
Total predictions: 400
