In [3]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.models import resnet50
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score
from PIL import Image
import csv

In [4]:
# Step 1: Load label_to_idx and validate
label_to_idx = {}
with open("label_to_idx.txt", "r") as f:
    for line in f:
        label, idx = line.strip().split()
        label_to_idx[label] = int(idx)

# Ensure the number of labels is exactly 79 (index 0 to 78)
assert len(label_to_idx) == 79, "The number of classes in label_to_idx is not 79."
print("label_to_idx loaded successfully.")

label_to_idx loaded successfully.


In [5]:
# Step 2: Load and process train.csv
train_df = pd.read_csv("train.csv")
train_df['labels'] = train_df['labels'].str.split(',')

# Map labels to indices
train_df['labels_idx'] = train_df['labels'].apply(
    lambda labels: [label_to_idx[label] for label in labels if label in label_to_idx]
)

# Encode labels into a multi-label binary format
mlb = MultiLabelBinarizer(classes=list(range(79)))  # Ensure 79 classes
y_train = mlb.fit_transform(train_df['labels_idx'])
print("Train labels processed successfully. Shape of y_train:", y_train.shape)

Train labels processed successfully. Shape of y_train: (37866, 79)


In [6]:
# Step 3: Define CustomDataset
class CustomDataset(Dataset):
    def __init__(self, dataframe, image_folder, transform, labels):
        self.dataframe = dataframe
        self.image_folder = image_folder
        self.transform = transform
        self.labels = labels

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        img_path = f"{self.image_folder}/{row['filename']}"
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return image, label

# Define image transformations with data augmentation
transform = transforms.Compose([
    transforms.RandomRotation(degrees=15),  # Random rotation
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random brightness/contrast
    transforms.RandomHorizontalFlip(p=0.5),  # Random horizontal flip
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
from sklearn.metrics import f1_score, hamming_loss

kf = KFold(n_splits=5, shuffle=True, random_state=42)  # 減少到 5 Fold
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for fold, (train_idx, val_idx) in enumerate(kf.split(train_df)):
    print(f"Fold {fold+1}")

    train_data, val_data = train_df.iloc[train_idx], train_df.iloc[val_idx]
    y_train_data, y_val_data = y_train[train_idx], y_train[val_idx]

    # Prepare datasets and dataloaders
    train_dataset = CustomDataset(train_data, "train_data", transform, y_train_data)
    val_dataset = CustomDataset(val_data, "train_data", transform, y_val_data)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)  # 增大 Batch Size
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

    # Step 5: Define the model
    model = resnet50(pretrained=True)
    model.fc = torch.nn.Sequential(
        torch.nn.Linear(model.fc.in_features, 512),
        torch.nn.ReLU(),
        torch.nn.Dropout(0.5),
        torch.nn.Linear(512, 79),  # 79 classes
        torch.nn.Sigmoid()
    )
    model = model.to(device)

    # Step 6: Train the model with early stopping
    criterion = torch.nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    num_epochs = 20  # 減少到 20 個 Epoch
    patience = 3  # Number of epochs to wait for improvement
    best_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Validation phase
        model.eval()
        val_loss = 0.0
        all_preds, all_labels = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                val_loss += criterion(outputs, labels).item()

                preds = (outputs > 0.5).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())

        f1 = f1_score(all_labels, all_preds, average="samples")
        hamming = hamming_loss(all_labels, all_preds)
        print(f"Epoch {epoch+1}, Train Loss: {running_loss / len(train_loader):.4f}, Val Loss: {val_loss / len(val_loader):.4f}, F1 Score: {f1:.4f}, Hamming Loss: {hamming:.4f}")

        # Early stopping logic
        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered. Best Val Loss:", best_loss)
                break


Fold 1




In [1]:
# Step 7: Generate submission.csv
model.eval()
test_df = pd.read_csv("test_data_public.csv")

with open("submission.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["filename"] + [f"class_{i}" for i in range(79)])

    for _, row in test_df.iterrows():
        img_path = f"test_data/{row['filename']}"
        image = Image.open(img_path).convert("RGB")
        image = transform(image).unsqueeze(0).to(device)

        preds = model(image).detach().cpu().numpy()[0]
        preds_rounded = [f"{p:.4f}" for p in preds]
        writer.writerow([row['filename']] + preds_rounded)

print("Submission file generated: submission.csv")

NameError: name 'model' is not defined