In [2]:
!bypy list
!bypy downfile food-11.zip

/apps/bypy ($t $f $s $m $d):
F food-11.zip 1163028761 2021-01-22, 20:26:42 7d9bbaedcqce1982041e54fba1f8c89f

In [8]:
import zipfile
import os
### 解压原始数据集，将src_path路径下的zip包解压至data/dataset目录下
src_path="/home/mist/food-11.zip"
target_path="/home/mist/dataset"
def unzip_data(src_path,target_path):
    if(not os.path.isdir(target_path)):    
        z = zipfile.ZipFile(src_path, 'r')    # 只读方式打开压缩文件
        z.extractall(path=target_path)        # 提取其中内容
        z.close()
    else:
        print("文件已解压")
unzip_data(src_path,target_path)

文件已解压


In [9]:
# import必要的第三方库
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time

In [10]:
def readfile(path, label):
    # label 是一个 boolean variable，代表需不需要回传 y 值
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
          y[i] = int(file.split("_")[0])
    if label:
      return x, y
    else:
      return x

In [12]:
#分別计算training set、validation set、testing set 的大小
workspace_dir = './dataset/food-11'
print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, "testing"), False)
print("Size of Testing data = {}".format(len(test_x)))

Reading data
Size of training data = 9866
Size of validation data = 3430
Size of Testing data = 3347


# Dataset

### 在 Pytorch 中，我们可以利用 torch.utils.data 的 Dataset 及 DataLoader 来"包装" data，使后续的 training 及 testing 更为方便。

### Dataset 需要 overload 两个函数：__len__ 及 __getitem__

### __len__ 必须要回传 dataset 的大小，而 __getitem__ 则定义了当程式利用 取值时，dataset 应该要怎么回传资料。

### 实际上我们并不会直接使用到这两个函数，但是使用 DataLoader 在 enumerate Dataset 时会使用到，没有实做的话会在程式运行阶段出现 error。

In [13]:
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), #随机将图片水平翻转
    transforms.RandomRotation(15), #随机旋转图片
    transforms.ToTensor(), #将图片转成 Tensor
])
#testing 时不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

In [14]:
batch_size = 128
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

In [15]:
#Model
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        #torch.nn.MaxPool2d(kernel_size, stride, padding)
        #input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [16]:
#Training
model = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因为是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30 # 训练轮数

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 确保 model 是在 train model (开启 Dropout 等...)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 将 model 参数的 gradient 归零
        train_pred = model(data[0].cuda()) # 利用 model 得到预测的机率分布 这边实际上就是去呼叫 model 的 forward 函数
        batch_loss = loss(train_pred, data[1].cuda()) # 计算 loss （注意 prediction 跟 label 必须同时在 CPU 或是 GPU 上）
        batch_loss.backward() # 利用 back propagation 算出每个参数的 gradient
        optimizer.step() # 以 optimizer 用 gradient 更新参数值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

[001/030] 28.74 sec(s) Train Acc: 0.248125 Loss: 0.017763 | Val Acc: 0.260641 loss: 0.017595
[002/030] 28.40 sec(s) Train Acc: 0.345834 Loss: 0.014737 | Val Acc: 0.287755 loss: 0.015745
[003/030] 28.36 sec(s) Train Acc: 0.398236 Loss: 0.013635 | Val Acc: 0.422449 loss: 0.013225
[004/030] 29.00 sec(s) Train Acc: 0.446888 Loss: 0.012517 | Val Acc: 0.388047 loss: 0.014008
[005/030] 28.60 sec(s) Train Acc: 0.472025 Loss: 0.011908 | Val Acc: 0.439942 loss: 0.013375
[006/030] 28.33 sec(s) Train Acc: 0.513785 Loss: 0.011115 | Val Acc: 0.472303 loss: 0.012677
[007/030] 28.16 sec(s) Train Acc: 0.533651 Loss: 0.010510 | Val Acc: 0.160350 loss: 0.032364
[008/030] 28.18 sec(s) Train Acc: 0.564667 Loss: 0.009979 | Val Acc: 0.501458 loss: 0.011824
[009/030] 28.35 sec(s) Train Acc: 0.581593 Loss: 0.009466 | Val Acc: 0.529155 loss: 0.011155
[010/030] 28.30 sec(s) Train Acc: 0.609872 Loss: 0.008984 | Val Acc: 0.560641 loss: 0.010725
[011/030] 28.20 sec(s) Train Acc: 0.622441 Loss: 0.008601 | Val Acc: 0

# 得到好的参数后，我们使用training set和validation set共同训练（资料量变多，模型效果较好）

In [17]:
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [18]:
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因为是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0].cuda())
        batch_loss = loss(train_pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

        #将結果 print 出來
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

[001/030] 34.48 sec(s) Train Acc: 0.256919 Loss: 0.017188
[002/030] 34.64 sec(s) Train Acc: 0.369434 Loss: 0.014016
[003/030] 34.26 sec(s) Train Acc: 0.450963 Loss: 0.012416
[004/030] 34.08 sec(s) Train Acc: 0.499097 Loss: 0.011280
[005/030] 34.32 sec(s) Train Acc: 0.543998 Loss: 0.010251
[006/030] 34.49 sec(s) Train Acc: 0.576038 Loss: 0.009489
[007/030] 34.32 sec(s) Train Acc: 0.609431 Loss: 0.008827
[008/030] 34.19 sec(s) Train Acc: 0.644780 Loss: 0.008078
[009/030] 34.52 sec(s) Train Acc: 0.667494 Loss: 0.007496
[010/030] 34.31 sec(s) Train Acc: 0.682085 Loss: 0.007200
[011/030] 34.34 sec(s) Train Acc: 0.700361 Loss: 0.006714
[012/030] 34.11 sec(s) Train Acc: 0.726008 Loss: 0.006303
[013/030] 34.35 sec(s) Train Acc: 0.742103 Loss: 0.005884
[014/030] 34.40 sec(s) Train Acc: 0.748797 Loss: 0.005606
[015/030] 34.76 sec(s) Train Acc: 0.773691 Loss: 0.005144
[016/030] 34.28 sec(s) Train Acc: 0.781965 Loss: 0.004921
[017/030] 34.06 sec(s) Train Acc: 0.796255 Loss: 0.004533
[018/030] 34.1

# Testing
### 利用刚刚 train 好的 model 进行 prediction

In [19]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [20]:
model_best.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model_best(data.cuda())
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y)

In [21]:
#将结果写入 csv 档
with open("predict.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))

In [None]:
def save_checkpoint(state,filename="checkpoint.pth"):
    print("Saving checkpoint")
    torch.save(state,filename)
checkpoint={'state_dict':model_best.state_dict(),'optimizer':optimizer.state_dict()}
save_checkpoint(checkpoint)