In [None]:
!pip install torchsummary



In [None]:
!gdown --id '10SYYurCRr4N-Lh_QW-YLifVRFQUZyluj' --output food-11.zip # 下載資料集
!unzip food-11.zip # 解壓縮


利用 OpenCV (cv2) 讀入照片並存放在 numpy array 中



In [None]:
# Import需要的套件
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time
from torchsummary import summary

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def readfile(path, label):
    # label 是一個 boolean variable，代表需不需要回傳 y 值
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
          y[i] = int(file.split("_")[0])
    if label:
      return x, y
    else:
      return x

In [None]:
# 分別將 training set、validation set、testing set 用 readfile 函式讀進來
workspace_dir = './food-11'
print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
print("Size of validation data = {}".format(len(val_x)))
#test_x = readfile(os.path.join(workspace_dir, "testing"), False)
#print("Size of Testing data = {}".format(len(test_x)))

Reading data
Size of training data = 9866
Size of validation data = 3430


In [None]:
# training 時做 data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomVerticalFlip(p=0.5),  # 隨機將圖片垂直翻轉
    transforms.RandomHorizontalFlip(p=0.5), # 隨機將圖片水平翻轉
    transforms.RandomRotation(20), # 隨機旋轉圖片
    transforms.ColorJitter(), # 隨機色溫
    transforms.RandomGrayscale(), #隨機灰階
    transforms.ToTensor(),# 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization)
    #transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),#參數待查看起來沒廢掉
    #transforms.RandomPerspective(distortion_scale=0.3, p=0.5),
    #transforms.RandomAffine(10),
])
# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

In [None]:
batch_size = 64
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Dropout2d(0.5),

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]

            nn.Dropout2d(0.3),
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            
            nn.Linear(1024, 512),
            nn.ReLU(),
            
            nn.Linear(512, 256),
            nn.ReLU(),
            
            nn.Linear(256, 128),
            nn.Linear(128, 11),
            nn.ReLU(),
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

        
model=Classifier()
summary(model.cuda(), (3, 128, 128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 128, 128]           1,792
       BatchNorm2d-2         [-1, 64, 128, 128]             128
              ReLU-3         [-1, 64, 128, 128]               0
         MaxPool2d-4           [-1, 64, 64, 64]               0
            Conv2d-5          [-1, 128, 64, 64]          73,856
       BatchNorm2d-6          [-1, 128, 64, 64]             256
              ReLU-7          [-1, 128, 64, 64]               0
         MaxPool2d-8          [-1, 128, 32, 32]               0
         Dropout2d-9          [-1, 128, 32, 32]               0
           Conv2d-10          [-1, 256, 32, 32]         295,168
      BatchNorm2d-11          [-1, 256, 32, 32]             512
             ReLU-12          [-1, 256, 32, 32]               0
        MaxPool2d-13          [-1, 256, 16, 16]               0
           Conv2d-14          [-1, 512,

使用 training set 訓練，並使用 validation set 尋找好的參數

In [None]:
model = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 250

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
        train_pred = model(data[0].cuda()) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
        batch_loss = loss(train_pred, data[1].cuda()) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
        batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
        optimizer.step() # 以 optimizer 用 gradient 更新參數值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

[001/250] 25.05 sec(s) Train Acc: 0.186398 Loss: 0.036563 | Val Acc: 0.207580 loss: 0.035562
[002/250] 25.27 sec(s) Train Acc: 0.258058 Loss: 0.034431 | Val Acc: 0.294752 loss: 0.033422
[003/250] 25.00 sec(s) Train Acc: 0.294648 Loss: 0.033201 | Val Acc: 0.279883 loss: 0.032839
[004/250] 24.93 sec(s) Train Acc: 0.321305 Loss: 0.032018 | Val Acc: 0.354810 loss: 0.030731
[005/250] 25.09 sec(s) Train Acc: 0.342489 Loss: 0.031192 | Val Acc: 0.354519 loss: 0.030463
[006/250] 25.02 sec(s) Train Acc: 0.354551 Loss: 0.030660 | Val Acc: 0.328863 loss: 0.031314
[007/250] 25.02 sec(s) Train Acc: 0.369045 Loss: 0.029780 | Val Acc: 0.304956 loss: 0.032146
[008/250] 25.04 sec(s) Train Acc: 0.378269 Loss: 0.029405 | Val Acc: 0.395044 loss: 0.028666
[009/250] 25.00 sec(s) Train Acc: 0.396716 Loss: 0.028614 | Val Acc: 0.391837 loss: 0.028803
[010/250] 25.02 sec(s) Train Acc: 0.396108 Loss: 0.028256 | Val Acc: 0.418659 loss: 0.027549
[011/250] 25.06 sec(s) Train Acc: 0.408474 Loss: 0.027857 | Val Acc: 0

In [None]:
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

得到好的參數後，我們使用 training set 和 validation set 共同訓練（資料量變多，模型效果較好）

In [None]:
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 250

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0].cuda())
        batch_loss = loss(train_pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

        #將結果 print 出來
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

[001/250] 30.41 sec(s) Train Acc: 0.174413 Loss: 0.036677
[002/250] 30.43 sec(s) Train Acc: 0.243156 Loss: 0.035034
[003/250] 30.33 sec(s) Train Acc: 0.263538 Loss: 0.033710
[004/250] 30.26 sec(s) Train Acc: 0.302422 Loss: 0.031588
[005/250] 30.24 sec(s) Train Acc: 0.318366 Loss: 0.030930
[006/250] 30.27 sec(s) Train Acc: 0.344465 Loss: 0.030215
[007/250] 30.27 sec(s) Train Acc: 0.366952 Loss: 0.029285
[008/250] 30.27 sec(s) Train Acc: 0.388914 Loss: 0.028162
[009/250] 30.34 sec(s) Train Acc: 0.410048 Loss: 0.027250
[010/250] 30.72 sec(s) Train Acc: 0.423436 Loss: 0.026222
[011/250] 30.75 sec(s) Train Acc: 0.439606 Loss: 0.025336
[012/250] 30.85 sec(s) Train Acc: 0.453069 Loss: 0.024851
[013/250] 30.53 sec(s) Train Acc: 0.466005 Loss: 0.024317
[014/250] 30.46 sec(s) Train Acc: 0.465253 Loss: 0.023916
[015/250] 30.37 sec(s) Train Acc: 0.479317 Loss: 0.023367
[016/250] 30.35 sec(s) Train Acc: 0.492253 Loss: 0.022897
[017/250] 30.40 sec(s) Train Acc: 0.500376 Loss: 0.022575
[018/250] 30.3

In [None]:
torch.save(model_best.state_dict(), 'model_best5.pth')

In [None]:
state_dict = torch.load('model_best5.pth')
print(state_dict.keys())

odict_keys(['cnn.0.weight', 'cnn.0.bias', 'cnn.1.weight', 'cnn.1.bias', 'cnn.1.running_mean', 'cnn.1.running_var', 'cnn.1.num_batches_tracked', 'cnn.4.weight', 'cnn.4.bias', 'cnn.5.weight', 'cnn.5.bias', 'cnn.5.running_mean', 'cnn.5.running_var', 'cnn.5.num_batches_tracked', 'cnn.9.weight', 'cnn.9.bias', 'cnn.10.weight', 'cnn.10.bias', 'cnn.10.running_mean', 'cnn.10.running_var', 'cnn.10.num_batches_tracked', 'cnn.13.weight', 'cnn.13.bias', 'cnn.14.weight', 'cnn.14.bias', 'cnn.14.running_mean', 'cnn.14.running_var', 'cnn.14.num_batches_tracked', 'cnn.18.weight', 'cnn.18.bias', 'cnn.19.weight', 'cnn.19.bias', 'cnn.19.running_mean', 'cnn.19.running_var', 'cnn.19.num_batches_tracked', 'fc.0.weight', 'fc.0.bias', 'fc.2.weight', 'fc.2.bias', 'fc.4.weight', 'fc.4.bias', 'fc.6.weight', 'fc.6.bias', 'fc.7.weight', 'fc.7.bias'])


In [None]:
torch.cuda.empty_cache()