In [185]:
_exp_name = 'sample'

In [186]:
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import os
import torchvision.transforms as transforms
from PIL import Image

from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

from tqdm.auto import tqdm

In [187]:
def same_seed(myseed):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(myseed)  
    torch.manual_seed(myseed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(myseed)

myseed = 6666
same_seed(myseed)

In [198]:
test_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [189]:
class FoodDataset(Dataset):
    
    def __init__(self, path, tfm = test_tfm, files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = [os.path.join(self.path, x) for x in os.listdir(self.path) if x.endswith('.jpg')]
        if files != None:
            self.files = files
        self.transform = tfm

    def __getitem__(self, idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)

        try:
            label = int(fname.split('\\')[-1].split('_')[0])
        except:
            label = -1
        return im, label

    def __len__(self):
        return len(self.files)


In [None]:
# class Classifier(nn.Module):
#     def __init__(self):
#         super(Classifier, self).__init__()
#         # input_dim: (3, 128, 128)
        
#         self.cnn = nn.Sequential(
#             nn.Conv2d(3, 64, 3, 1, 1), # output_dim: (64, 128, 128)
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0), # output_dim: (64, 64, 64)

#             nn.Conv2d(64, 128, 3, 1, 1), # output_dim: (128, 64, 64)
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 1), #output_dim: (128, 32, 32)

#             nn.Conv2d(128, 256, 3, 1, 1), # output_dim: (256, 64, 64)
#             nn.BatchNorm2d(256),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 1), #output_dim: (256, 16, 16)

#             nn.Conv2d(256, 512, 3, 1, 1), # output_dim: (512, 16, 16)
#             nn.BatchNorm2d(512),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0), # output_dim: (512, 8, 8)

#             nn.Conv2d(512, 512, 3, 1, 1), # output_dim: (512, 8, 8)
#             nn.BatchNorm2d(512),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0) #output_dim: (512, 4, 4)
#         )
        
#         self.fc = nn.Sequential(
#             nn.Linear(512 * 4 * 4, 1024),
#             nn.ReLU(),
#             nn.Linear(1024, 512),
#             nn.ReLU(),
#             nn.Linear(512, 11)
#         )

#     def forward(self, x):
#         out = self.cnn(x)
#         out = out.view(out.size()[0], -1)
#         out = self.fc(out)
#         return out


In [203]:
import torchvision.models as models

model = models.resnet50(pretrained = True)
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, 11)



Input shape: torch.Size([233, 3, 128, 128])

### 逐层输出 shape:
- After layer 1 (Conv2d): torch.Size([233, 64, 128, 128])
- After layer 2 (BatchNorm2d): torch.Size([233, 64, 128, 128])
- After layer 3 (ReLU): torch.Size([233, 64, 128, 128])
- After layer 4 (MaxPool2d): torch.Size([233, 64, 64, 64])
- After layer 5 (Conv2d): torch.Size([233, 128, 64, 64])
- After layer 6 (BatchNorm2d): torch.Size([233, 128, 64, 64])
- After layer 7 (ReLU): torch.Size([233, 128, 64, 64])
- After layer 8 (MaxPool2d): torch.Size([233, 128, 33, 33])
- After layer 9 (Conv2d): torch.Size([233, 256, 33, 33])
- After layer 10 (BatchNorm2d): torch.Size([233, 256, 33, 33])
- After layer 11 (ReLU): torch.Size([233, 256, 33, 33])
- After layer 12 (MaxPool2d): torch.Size([233, 256, 17, 17])
- After layer 13 (Conv2d): torch.Size([233, 512, 17, 17])
- After layer 14 (BatchNorm2d): torch.Size([233, 512, 17, 17])
- After layer 15 (ReLU): torch.Size([233, 512, 17, 17])
- After layer 16 (MaxPool2d): torch.Size([233, 512, 8, 8])
- After layer 17 (Conv2d): torch.Size([233, 512, 8, 8])
- After layer 18 (BatchNorm2d): torch.Size([233, 512, 8, 8])
- After layer 19 (ReLU): torch.Size([233, 512, 8, 8])
- After layer 20 (MaxPool2d): torch.Size([233, 512, 4, 4])
- Before fc (after flattening): torch.Size([233, 512, 4, 4])
- After flattening: torch.Size([233, 8192])
- After layer 21 (Linear): torch.Size([233, 1024])
- After layer 22 (ReLU): torch.Size([233, 1024])
- After layer 23 (Linear): torch.Size([233, 512])
- After layer 24 (ReLU): torch.Size([233, 512])
- After layer 25 (Linear): torch.Size([233, 11])


In [204]:
# from torchviz import make_dot 

# if __name__ == '__main__':
#     # 构造一个 shape 为 [233, 3, 128, 128] 的输入 tensor
#     x = torch.randn(233, 3, 128, 128)
#     print("Input shape:", x.shape)  # 打印输入 shape

#     # 创建模型实例
#     model = Classifier()

#     # 逐层打印模型输出的 shape
#     print("\n逐层输出 shape:")
#     out = x
#     # 对于 self.cnn 中的每个层依次处理并打印输出 shape
#     layer_num = 1
#     for layer in model.cnn:
#         out = layer(out)
#         print(f"After layer {layer_num} ({layer.__class__.__name__}): {out.shape}")
#         layer_num += 1

#     # 展平前的输出 shape
#     print(f"Before fc (after flattening): {out.shape}")
#     out = out.view(out.size()[0], -1)
#     print(f"After flattening: {out.shape}")

#     # 逐层通过全连接层
#     for layer in model.fc:
#         out = layer(out)
#         print(f"After layer {layer_num} ({layer.__class__.__name__}): {out.shape}")
#         layer_num += 1

#     # 使用 torchviz 可视化整个模型的计算图
#     # 注意：可视化时需要传入模型的输出及模型参数
#     out_graph = model(x)
#     dot = make_dot(out_graph, params=dict(model.named_parameters()))
#     # 生成 PNG 文件并自动打开（若系统配置支持）
#     dot.format = 'png'
#     dot.render("model_visualization", view=True)

#     print("\n模型计算图已保存为 'model_visualization.png'")

In [205]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# model = Classifier().to(device)
model = model.to(device)
batch_size = 64
n_epochs = 20
patience = 5
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0003, weight_decay = 1e-5)

In [206]:
train_set = FoodDataset('./train', tfm = train_tfm)
valid_set = FoodDataset('./valid', tfm = test_tfm)

train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = 0, pin_memory = True)
valid_loader = DataLoader(valid_set, batch_size = batch_size, shuffle = True, num_workers = 0, pin_memory = True)

In [207]:
stale = 0
best_acc = 0

for epoch in range(n_epochs):
    model.train()

    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):
        imgs, labels = batch
        imgs = imgs.to(device)
        labels = labels.to(device)

        preds = model(imgs)
        loss = criterion(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc = (preds.argmax(dim = -1) == labels).float().mean()

        train_loss.append(loss.item())
        train_accs.append(acc.item())
        # train_loss.append()
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)
    print(f'[Train | {epoch + 1} / {n_epochs}] loss = {train_loss:.5f}, acc = {train_acc:.5f}') 

    model.eval()
    valid_loss = []
    valid_accs = []

    for batch in tqdm(valid_loader):
        with torch.no_grad():
            imgs, labels = batch
            imgs = imgs.to(device)
            labels = labels.to(device)

            preds = model(imgs)
            loss = criterion(preds, labels)
            acc = (preds.argmax(dim = -1) == labels).float().mean()

            valid_loss.append(loss.item())
            valid_accs.append(acc.item())

    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    print(f'[Valid | {epoch + 1} / {n_epochs}] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}') 

    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 1 / 20] loss = 0.66632, acc = 0.78384


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 1 / 20] loss = 0.52518, acc = 0.83392
[ Valid | 001/020 ] loss = 0.52518, acc = 0.83392 -> best
Best model found at epoch 0, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 2 / 20] loss = 0.32465, acc = 0.89510


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 2 / 20] loss = 0.55086, acc = 0.83836
[ Valid | 002/020 ] loss = 0.55086, acc = 0.83836 -> best
Best model found at epoch 1, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 3 / 20] loss = 0.23646, acc = 0.92436


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 3 / 20] loss = 0.54437, acc = 0.83392
[ Valid | 003/020 ] loss = 0.54437, acc = 0.83392


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 4 / 20] loss = 0.15227, acc = 0.95004


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 4 / 20] loss = 0.51084, acc = 0.85834
[ Valid | 004/020 ] loss = 0.51084, acc = 0.85834 -> best
Best model found at epoch 3, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 5 / 20] loss = 0.09631, acc = 0.96845


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 5 / 20] loss = 0.52940, acc = 0.84859
[ Valid | 005/020 ] loss = 0.52940, acc = 0.84859


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 6 / 20] loss = 0.12655, acc = 0.95880


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 6 / 20] loss = 0.62499, acc = 0.83253
[ Valid | 006/020 ] loss = 0.62499, acc = 0.83253


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 7 / 20] loss = 0.09558, acc = 0.96795


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 7 / 20] loss = 0.57696, acc = 0.85218
[ Valid | 007/020 ] loss = 0.57696, acc = 0.85218


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 8 / 20] loss = 0.07224, acc = 0.97582


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 8 / 20] loss = 0.70329, acc = 0.82079
[ Valid | 008/020 ] loss = 0.70329, acc = 0.82079


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 9 / 20] loss = 0.09663, acc = 0.96935


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 9 / 20] loss = 0.84865, acc = 0.79500
[ Valid | 009/020 ] loss = 0.84865, acc = 0.79500


  0%|          | 0/157 [00:00<?, ?it/s]

[Train | 10 / 20] loss = 0.10359, acc = 0.96676


  0%|          | 0/57 [00:00<?, ?it/s]

[Valid | 10 / 20] loss = 0.62979, acc = 0.84414
[ Valid | 010/020 ] loss = 0.62979, acc = 0.84414
No improvment 5 consecutive epochs, early stopping


In [195]:
test_set = FoodDataset('./test', test_tfm)
test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = False, num_workers = 0, pin_memory = True)

In [208]:
model_best = models.resnet50(pretrained = False)
in_features = model_best.fc.in_features
model_best.fc = nn.Linear(in_features, 11)
model_best = model_best.to(device)
model_best.load_state_dict(torch.load(f'{_exp_name}_best.ckpt'))
# model_best.load_state_dict(torch.load(f'{_exp_name}_best.ckpt'))
model_best.eval()
prediction = []
with torch.no_grad():
    for data, _ in tqdm(test_loader):
        data = data.to(device)
        preds = model_best(data)
        test_label = np.argmax(preds.cpu().data.numpy(), axis = 1)
        prediction += test_label.squeeze().tolist()

  model_best.load_state_dict(torch.load(f'{_exp_name}_best.ckpt'))


  0%|          | 0/47 [00:00<?, ?it/s]

In [209]:
# create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)