<a href="https://colab.research.google.com/github/brandon0824/HungyiML/blob/master/hw3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
!nvidia-smi

In [None]:
!gdown --id '19CzXudqN58R3D-1G8KeFWk8UDQwlb8is' --output food-11.zip # 下載資料集
!unzip food-11.zip # 解壓縮

In [13]:
# Import需要的套件
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time

In [14]:
def readfile(path, label):
    # label 是一個 boolean variable，代表需不需要回傳 y 值
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    # y存储标签，每个y大小为1
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        # 利用cv2.resize()函数将不同大小的图片统一为128(高)*128(宽)
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
          y[i] = int(file.split("_")[0]) # y即图片标签 即所属类别(1-11)
    if label:
      return x, y
    else:
      return x



# 分別將 training set、validation set、testing set 用 readfile 函式讀進來
workspace_dir = './food-11'
print("Reading data")
# os.path.join 目录和文件名合成一个路径
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, "testing"), False)
print("Size of Testing data = {}".format(len(test_x)))
print(train_y)
print(val_y)

Reading data
Size of training data = 9866
Size of validation data = 3430
Size of Testing data = 3347
[0 0 0 ... 9 9 9]
[0 0 0 ... 9 9 9]


In [17]:
# training 時做 data augmentation(数据增强)
# 数据增强主要用来防止过拟合，用于dataset较小的时候。
# 参考：https://blog.csdn.net/lanmengyiyu/article/details/79658545
train_transform = transforms.Compose([
    transforms.ToPILImage(), # 将tensor转化为PILImage
    transforms.RandomHorizontalFlip(), # 隨機將圖片水平翻轉
    transforms.RandomRotation(15), # 隨機旋轉圖片
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization)
])
# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])

# 在 PyTorch 中，我們可以利用 torch.utils.data 的 Dataset 及 DataLoader 來"包裝" data，使後續的 training 及 testing 更為方便。

# Dataset 需要 overload 兩個函數：__len__ 及 __getitem__

# __len__ 必須要回傳 dataset 的大小，而 __getitem__ 則定義了當程式利用 [ ] 取值時，dataset 應該要怎麼回傳資料。

# 實際上我們並不會直接使用到這兩個函數，但是使用 DataLoader 在 enumerate Dataset 時會使用到，沒有實做的話會在程式運行階段出現 error。
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X


batch_size = 128
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
# shuffle （bool, optional） - 每一个epoch进行的时候是否要进行随机打乱（默认为False）
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

In [18]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            # 第一个参数代表输入数据的通道数，例RGB图片通道数为3；
            # 第二个参数代表输出数据的通道数，这个根据模型调整；
            # 第三个参数是卷积核大小
            # 第四个参数是stride，步长
            # 第五个参数是padding，补1  输入的每一条边补充0的层数
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),

            # 第一个参数是kernel_size，max pooling的窗口大小，
            # 第二个参数是stride，max pooling的窗口移动的步长。默认值是kernel_size
            # 第三个参数输入的每一条边补充0的层数，默认是0
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        # 最后输出到全连接层
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )
    def forward(self, x):
        out = self.cnn(x)
        # out.view类似于numpy中reshape的作用 原张量拉直成一维的结构
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [26]:
model = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam learning rate=0.01
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
        train_pred = model(data[0].cuda()) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
        # data[0]预测结果 data[1]真实值
        batch_loss = loss(train_pred, data[1].cuda()) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
        batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
        optimizer.step() # 以 optimizer 用 gradient 更新參數值

        # np.argmax找到最大数的索引 axis=1求这一行的最大值的索引
        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].cpu().numpy())
        train_loss += batch_loss.item()
    
    # pytorch会自动把Batch Normalization和DropOut固定住，不会取平均，而是用训练好的值。
    # 不然的话，一旦test的batch_size过小，很容易就会被BN层导致生成图片颜色失真极大
    # 在模型测试阶段使用
    model.eval()
    # torch.no_grad 达到 暂时 不追踪网络参数中的导数的目的
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].cpu().numpy())
            val_loss += batch_loss.item()

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

[001/030] 24.57 sec(s) Train Acc: 0.247821 Loss: 0.017860 | Val Acc: 0.281050 loss: 0.016117
[002/030] 24.77 sec(s) Train Acc: 0.337421 Loss: 0.014813 | Val Acc: 0.317784 loss: 0.015932
[003/030] 24.36 sec(s) Train Acc: 0.389621 Loss: 0.013763 | Val Acc: 0.327988 loss: 0.015547
[004/030] 24.29 sec(s) Train Acc: 0.429151 Loss: 0.012876 | Val Acc: 0.394461 loss: 0.013544
[005/030] 24.54 sec(s) Train Acc: 0.470099 Loss: 0.012068 | Val Acc: 0.437901 loss: 0.012570
[006/030] 24.55 sec(s) Train Acc: 0.492398 Loss: 0.011405 | Val Acc: 0.395044 loss: 0.015670
[007/030] 24.38 sec(s) Train Acc: 0.528583 Loss: 0.010681 | Val Acc: 0.410787 loss: 0.014325
[008/030] 24.45 sec(s) Train Acc: 0.544294 Loss: 0.010394 | Val Acc: 0.492420 loss: 0.011966
[009/030] 24.56 sec(s) Train Acc: 0.560004 Loss: 0.009986 | Val Acc: 0.487464 loss: 0.012690
[010/030] 24.46 sec(s) Train Acc: 0.592337 Loss: 0.009297 | Val Acc: 0.489504 loss: 0.012079
[011/030] 24.47 sec(s) Train Acc: 0.595175 Loss: 0.009114 | Val Acc: 0

In [27]:
# numpy.concatenate完成数组拼接
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [29]:
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0].cuda())
        batch_loss = loss(train_pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].cpu().numpy())
        train_loss += batch_loss.item()

        #將結果 print 出來
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

[001/030] 29.55 sec(s) Train Acc: 0.239847 Loss: 0.017460
[002/030] 29.16 sec(s) Train Acc: 0.361236 Loss: 0.014116
[003/030] 28.94 sec(s) Train Acc: 0.432160 Loss: 0.012749
[004/030] 29.35 sec(s) Train Acc: 0.478415 Loss: 0.011710
[005/030] 29.22 sec(s) Train Acc: 0.524669 Loss: 0.010695
[006/030] 29.11 sec(s) Train Acc: 0.560770 Loss: 0.009905
[007/030] 29.21 sec(s) Train Acc: 0.597774 Loss: 0.009102
[008/030] 29.25 sec(s) Train Acc: 0.630039 Loss: 0.008383
[009/030] 29.20 sec(s) Train Acc: 0.654558 Loss: 0.007792
[010/030] 29.24 sec(s) Train Acc: 0.677948 Loss: 0.007343
[011/030] 29.18 sec(s) Train Acc: 0.693742 Loss: 0.006927
[012/030] 29.15 sec(s) Train Acc: 0.706528 Loss: 0.006584
[013/030] 29.21 sec(s) Train Acc: 0.720217 Loss: 0.006296
[014/030] 29.15 sec(s) Train Acc: 0.751955 Loss: 0.005658
[015/030] 29.16 sec(s) Train Acc: 0.758424 Loss: 0.005423
[016/030] 29.19 sec(s) Train Acc: 0.774669 Loss: 0.005116
[017/030] 29.15 sec(s) Train Acc: 0.784146 Loss: 0.004865
[018/030] 29.1

In [30]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [31]:
model_best.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model_best(data.cuda())
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y)

In [32]:
#將結果寫入 csv 檔
with open("./drive/My Drive/Hung-yi Lee DLML HW/hw3/predict.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))

In [None]:
import csv
csvFile = open("./drive/My Drive/Hung-yi Lee DLML HW/hw3/predict.csv", "r")
csv_reader = csv.reader(csvFile)
data= []
for line in csv_reader:
    data.append([x for x in line[:len(line)]])     #[x for x in line[:len(line)]]：形成一个行列表，对每行逐个字符串，进行追加；   data.append（）：对添加一个行列表
    print(data)