In [1]:
!nvidia-smi

Wed Dec  3 05:50:14 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.172.08             Driver Version: 570.172.08     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   37C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                       Off |   00

In [2]:
import numpy as np
import pandas as pd
import torch
import os
import random
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
from torchvision import models
from tqdm.auto import tqdm
import random


In [3]:
myseed = 6666
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

_exp_name = "sample"


In [4]:
# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    # transforms.Resize((128, 128)),
    # You may add some transforms here.
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
 
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.55807906, 0.45261728, 0.34557677], std=[0.23075283, 0.24137004, 0.24039967])
])
 
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    # transforms.Resize((128, 128)),
    transforms.Resize(256),  # 256
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.55807906, 0.45261728, 0.34557677], std=[0.23075283, 0.24137004, 0.24039967])
])

In [5]:
class FoodDataset(Dataset):
    def __init__(self, path, tfm=test_tfm, files=None):
        super().__init__()
        self.path = path
        # 读取目录下所有 jpg 文件
        self.files = sorted([os.path.join(path, x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        self.transform = tfm

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        
        try:
            # 解析文件名获取标签
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # 测试集没有标签
            
        return im, label

In [6]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # 使用 torchvision 的 resnet18
        self.backbone = models.resnet18(pretrained=True)
        # 替换最后一层，全连接输出 11 类
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_features, 11)

    def forward(self, x):
        return self.backbone(x)

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"

# 初始化模型
model = Classifier().to(device)

# --- 优化部分：双卡并行 ---
if torch.cuda.device_count() > 1:
    print(f"Let's use {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)

# 超参数设置
# 优化：增大 batch_size 以利用双卡显存 (64 -> 128)
batch_size = 128 

# 优化：增加训练轮数 (8 -> 60)
n_epochs = 20 

# 优化：增加早停耐心值 (5 -> 15)
patience = 5 

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

train_path = '/kaggle/input/ml2023spring-hw3/train'
valid_path = '/kaggle/input/ml2023spring-hw3/valid'
test_path = '/kaggle/input/ml2023spring-hw3/test'


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 173MB/s]


Let's use 2 GPUs!


In [8]:
train_set = FoodDataset(train_path, tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

valid_set = FoodDataset(valid_path, tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

print('data ok')

data ok


In [9]:
stale = 0
best_acc = 0

for epoch in range(n_epochs):
    # ---------- Training ----------
    model.train()
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):
        imgs, labels = batch
        logits = model(imgs.to(device))
        loss = criterion(logits, labels.to(device))
        optimizer.zero_grad()
        loss.backward()
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
        optimizer.step()

        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
        train_loss.append(loss.item())
        train_accs.append(acc)

    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    model.eval()
    valid_loss = []
    valid_accs = []

    for batch in tqdm(valid_loader):
        imgs, labels = batch
        with torch.no_grad():
            logits = model(imgs.to(device))
        loss = criterion(logits, labels.to(device))
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
        valid_loss.append(loss.item())
        valid_accs.append(acc)

    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

    # 更新日志
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a") as f:
            f.write(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best\n")
    else:
        with open(f"./{_exp_name}_log.txt","a") as f:
            f.write(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}\n")

    # 保存模型 - 优化：处理 DataParallel 的保存
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch + 1}, saving model")
        # 如果使用了 DataParallel，需要保存 model.module 的参数
        if isinstance(model, nn.DataParallel):
            torch.save(model.module.state_dict(), f"{_exp_name}_best.ckpt")
        else:
            torch.save(model.state_dict(), f"{_exp_name}_best.ckpt")
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvement {patience} consecutive epochs, early stopping")
            break

  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 001/020 ] loss = 0.83918, acc = 0.72844


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 001/020 ] loss = 0.55892, acc = 0.80872
Best model found at epoch 1, saving model


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 002/020 ] loss = 0.54986, acc = 0.82189


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 002/020 ] loss = 0.51082, acc = 0.83436
Best model found at epoch 2, saving model


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 003/020 ] loss = 0.46915, acc = 0.85057


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 003/020 ] loss = 0.47206, acc = 0.85066
Best model found at epoch 3, saving model


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 004/020 ] loss = 0.43078, acc = 0.86027


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 004/020 ] loss = 0.48695, acc = 0.84635


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 005/020 ] loss = 0.40633, acc = 0.87085


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 005/020 ] loss = 0.39079, acc = 0.88178
Best model found at epoch 5, saving model


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 006/020 ] loss = 0.36105, acc = 0.88172


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 006/020 ] loss = 0.42547, acc = 0.86489


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 007/020 ] loss = 0.34538, acc = 0.88489


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 007/020 ] loss = 0.43959, acc = 0.86485


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 008/020 ] loss = 0.31013, acc = 0.89765


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 008/020 ] loss = 0.43062, acc = 0.86606


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 009/020 ] loss = 0.32234, acc = 0.89587


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 009/020 ] loss = 0.40381, acc = 0.87634


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 010/020 ] loss = 0.29269, acc = 0.90457


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 010/020 ] loss = 0.41331, acc = 0.86893


  0%|          | 0/79 [00:00<?, ?it/s]

[ Train | 011/020 ] loss = 0.29676, acc = 0.90398


  0%|          | 0/29 [00:00<?, ?it/s]

[ Valid | 011/020 ] loss = 0.41382, acc = 0.87742
No improvement 5 consecutive epochs, early stopping


In [10]:
test_set = FoodDataset(test_path, tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# 加载最佳模型
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()

prediction = []
with torch.no_grad():
    for data, _ in tqdm(test_loader):
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

# 创建提交文件
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
    
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv("submission.csv", index=False)
print("Submission file saved as submission.csv")

  0%|          | 0/24 [00:00<?, ?it/s]

Submission file saved as submission.csv


In [None]:
import torch
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from tqdm import tqdm
import matplotlib.cm as cm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 加载训练好的模型
model = Classifier().to(device)
state_dict = torch.load(f"{_exp_name}_best.ckpt", map_location=device)
model.load_state_dict(state_dict)
model.eval()

# 定义一个函数：获取 resnet 的 fc 之前的特征
def extract_features_backbone(x):
    # 参考 torchvision.models.resnet18 的 forward 源码
    x = model.backbone.conv1(x)
    x = model.backbone.bn1(x)
    x = model.backbone.relu(x)
    x = model.backbone.maxpool(x)

    x = model.backbone.layer1(x)
    x = model.backbone.layer2(x)
    x = model.backbone.layer3(x)
    x = model.backbone.layer4(x)

    x = model.backbone.avgpool(x)          # (B, 512, 1, 1)
    x = torch.flatten(x, 1)                # (B, 512)
    return x

valid_set = FoodDataset(valid_path, tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)

features = []
labels = []

for batch in tqdm(valid_loader):
    imgs, lbls = batch
    with torch.no_grad():
        feats = extract_features_backbone(imgs.to(device))
    labels.extend(lbls.cpu().numpy())
    feats = feats.cpu().numpy()
    features.extend(feats)

features = np.array(features)
labels = np.array(labels)

# t-SNE 可视化
features_tsne = TSNE(n_components=2, init='pca', random_state=42).fit_transform(features)
plt.figure(figsize=(10, 8))
for label in np.unique(labels):
    plt.scatter(features_tsne[labels == label, 0], features_tsne[labels == label, 1], label=label, s=5)
plt.legend()
plt.title("t-SNE of ResNet18 Features")
plt.show()

100%|██████████| 57/57 [00:10<00:00,  5.49it/s]
