使用 cuda

In [None]:
import os

# Set model cache directory (must be set before importing torch)
MODELS_DIR = '../models'
os.makedirs(MODELS_DIR, exist_ok=True)
os.environ['TORCH_HOME'] = MODELS_DIR

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision
from torchvision import transforms, models
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

# Check CUDA availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
print(f"Model cache directory: {os.path.abspath(MODELS_DIR)}")


定义超参数

In [None]:
# Configuration
DATA_DIR = '../data/ClassifyLeaves'
BATCH_SIZE = 32  # Reduced for ResNet50 (larger model)
NUM_EPOCHS = 30
LEARNING_RATE = 1e-4
VAL_RATIO = 0.2  # 20% for validation
NUM_WORKERS = 0  # Set to 0 for Windows compatibility, can increase on Linux
MODEL_SAVE_PATH = os.path.join(MODELS_DIR, 'kcf-resnet50.pth')

# Image size for ResNet
IMG_SIZE = 224


读取数据

In [None]:
# Load train.csv and create label mapping
train_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
test_df = pd.read_csv(os.path.join(DATA_DIR, 'test.csv'))

# Create label to index mapping
labels = sorted(train_df['label'].unique())
label2idx = {label: idx for idx, label in enumerate(labels)}
idx2label = {idx: label for label, idx in label2idx.items()}
num_classes = len(labels)

print(f"Number of classes: {num_classes}")
print(f"Training samples: {len(train_df)}")
print(f"Test samples: {len(test_df)}")

定义数据集

In [None]:
# Define Dataset class
class LeavesDataset(Dataset):
    def __init__(self, df, data_dir, label2idx=None, transform=None, is_test=False):
        self.df = df.reset_index(drop=True)
        self.data_dir = data_dir
        self.label2idx = label2idx
        self.transform = transform
        self.is_test = is_test
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.data_dir, self.df.loc[idx, 'image'])
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_test:
            return image
        else:
            label = self.df.loc[idx, 'label']
            label_idx = self.label2idx[label]
            return image, label_idx


数据增广

In [None]:
# Define data augmentation and transforms
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


划分训练集与验证集

In [None]:
# Split train data into train and validation sets
from sklearn.model_selection import train_test_split

train_data, val_data = train_test_split(
    train_df, 
    test_size=VAL_RATIO, 
    random_state=42, 
    stratify=train_df['label']
)

print(f"Training set size: {len(train_data)}")
print(f"Validation set size: {len(val_data)}")

# Create datasets
train_dataset = LeavesDataset(train_data, DATA_DIR, label2idx, transform=train_transform)
val_dataset = LeavesDataset(val_data, DATA_DIR, label2idx, transform=val_transform)

# Create dataloaders
train_loader = DataLoader(
    train_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=True, 
    num_workers=NUM_WORKERS,
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset, 
    batch_size=BATCH_SIZE, 
    shuffle=False, 
    num_workers=NUM_WORKERS,
    pin_memory=True
)

print(f"Number of training batches: {len(train_loader)}")
print(f"Number of validation batches: {len(val_loader)}")


定义 ResNet50

In [None]:
# Define model - ResNet50 with pretrained weights
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

# Modify the last fully connected layer for our number of classes
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)

# Move model to device
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)

print(f"Model: ResNet50")
print(f"Number of parameters: {sum(p.numel() for p in model.parameters()):,}")


定义训练与验证操作

In [None]:
# Training function
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc='Training', leave=False)
    for images, labels in pbar:
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        pbar.set_postfix({'loss': f'{loss.item():.4f}', 'acc': f'{100.*correct/total:.2f}%'})
    
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc

# Validation function
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        pbar = tqdm(loader, desc='Validation', leave=False)
        for images, labels in pbar:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            pbar.set_postfix({'loss': f'{loss.item():.4f}', 'acc': f'{100.*correct/total:.2f}%'})
    
    epoch_loss = running_loss / total
    epoch_acc = 100. * correct / total
    return epoch_loss, epoch_acc


进行训练

In [None]:
# Training loop
history = {
    'train_loss': [],
    'val_loss': [],
    'val_acc': []
}

best_val_acc = 0.0
best_epoch = 0

print("Starting training...")
print("=" * 60)

for epoch in range(NUM_EPOCHS):
    # Train
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validate
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    # Update learning rate
    scheduler.step()
    
    # Save history
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # Print epoch results
    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | "
          f"Val Loss: {val_loss:.4f} | "
          f"Val Acc: {val_acc:.2f}% | "
          f"LR: {scheduler.get_last_lr()[0]:.6f}")
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_epoch = epoch + 1
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_acc': val_acc,
            'val_loss': val_loss,
            'label2idx': label2idx,
            'idx2label': idx2label
        }, MODEL_SAVE_PATH)
        print(f"  -> Best model saved! (Val Acc: {val_acc:.2f}%)")

print("=" * 60)
print(f"Training completed!")
print(f"Best validation accuracy: {best_val_acc:.2f}% at epoch {best_epoch}")


可视化训练过程

In [None]:
# Visualize training history
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

epochs_range = range(1, NUM_EPOCHS + 1)

# Plot training loss
axes[0].plot(epochs_range, history['train_loss'], 'b-', linewidth=2, label='Train Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot validation loss
axes[1].plot(epochs_range, history['val_loss'], 'r-', linewidth=2, label='Val Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].set_title('Validation Loss')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Plot validation accuracy
axes[2].plot(epochs_range, history['val_acc'], 'g-', linewidth=2, label='Val Accuracy')
axes[2].axhline(y=best_val_acc, color='r', linestyle='--', alpha=0.7, label=f'Best: {best_val_acc:.2f}%')
axes[2].scatter([best_epoch], [best_val_acc], color='r', s=100, zorder=5)
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('Accuracy (%)')
axes[2].set_title('Validation Accuracy')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nTraining history saved to 'training_history.png'")


可视化训练结果

In [None]:
# Combined loss plot
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Loss comparison
axes[0].plot(epochs_range, history['train_loss'], 'b-', linewidth=2, label='Train Loss')
axes[0].plot(epochs_range, history['val_loss'], 'r-', linewidth=2, label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Train vs Validation Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Validation accuracy
axes[1].plot(epochs_range, history['val_acc'], 'g-', linewidth=2, label='Val Accuracy')
axes[1].axhline(y=best_val_acc, color='r', linestyle='--', alpha=0.7, label=f'Best: {best_val_acc:.2f}%')
axes[1].scatter([best_epoch], [best_val_acc], color='r', s=100, zorder=5)
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Validation Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_summary.png', dpi=150, bbox_inches='tight')
plt.show()

# Print final summary
print("\n" + "=" * 60)
print("Training Summary")
print("=" * 60)
print(f"Best Epoch: {best_epoch}")
print(f"Best Validation Accuracy: {best_val_acc:.2f}%")
print(f"Final Train Loss: {history['train_loss'][-1]:.4f}")
print(f"Final Val Loss: {history['val_loss'][-1]:.4f}")
print(f"Model saved to: {MODEL_SAVE_PATH}")


进行预测

In [None]:
# Load best model for prediction
checkpoint = torch.load(MODEL_SAVE_PATH, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
idx2label = checkpoint['idx2label']
print(f"Loaded best model from: {MODEL_SAVE_PATH}")
print(f"Best validation accuracy: {checkpoint['val_acc']:.2f}%")

# Create test dataset and dataloader
test_dataset = LeavesDataset(test_df, DATA_DIR, transform=val_transform, is_test=True)
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

print(f"Test samples: {len(test_dataset)}")
print(f"Test batches: {len(test_loader)}")


In [None]:
# Predict on test set
model.eval()
predictions = []

with torch.no_grad():
    pbar = tqdm(test_loader, desc='Predicting')
    for images in pbar:
        images = images.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        predictions.extend(predicted.cpu().numpy())

# Convert indices to labels
predicted_labels = [idx2label[idx] for idx in predictions]

print(f"Total predictions: {len(predicted_labels)}")


将预测结果写入 sample_submission.csv

In [None]:
# Create submission dataframe
submission_df = pd.DataFrame({
    'image': test_df['image'],
    'label': predicted_labels
})

# Save to submission file
submission_path = os.path.join(DATA_DIR, 'sample_submission.csv')
submission_df.to_csv(submission_path, index=False)

print(f"Submission saved to: {submission_path}")
print(f"\nSubmission preview:")
print(submission_df.head(10))
print(f"\nLabel distribution (top 10):")
print(submission_df['label'].value_counts().head(10))
