# Object_Detection_YOLOv5 (Training)
1. 需要先有前處理的CSV。
1. 需要先有前處理的圖檔資料集。
1. 需要先有前處理的標籤資料集。

<a class="anchor" id="0"></a>
# Table of Contents

1. [套件安裝與載入](#1)
1. [環境檢測與設定](#2)
1. [開發參數設定](#3)
1. [資料處理](#4)
    -  [載入CSV檔](#4.1)
    -  [檢查CSV檔缺失值](#4.2)
1. [定義模型方法](#5)
1. [定義回調函數方法](#6)
1. [製作資料集＆資料擴增&回調函數&訓練模型](#7)
1. [混淆矩陣 & Quadratic Weighted Kappa](#8)
1. [待辦事項](#9)

# 1. 套件安裝與載入<a class="anchor" id="1"></a>
[Back to Table of Contents](#0)

In [None]:
# # YOLOv5
# !git clone https://github.com/ultralytics/yolov5  # clone repo
# %cd yolov5
# %pip install -qr requirements.txt  # install dependencies

In [None]:
# 資料處理套件
import os
import gc
import time
import shutil
import datetime
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from tqdm import tqdm
from sklearn.model_selection import train_test_split, GroupKFold

import warnings
warnings.filterwarnings("ignore")

In [None]:
# 設定顯示中文字體
from matplotlib.font_manager import FontProperties
plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei'] # 用來正常顯示中文標籤
plt.rcParams['font.family'] = 'AR PL UMing CN'
plt.rcParams['axes.unicode_minus'] = False # 用來正常顯示負號

In [None]:
# pytorch深度學習模組套件
import torch

# 2. 環境檢測與設定<a class="anchor" id="2"></a>
[Back to Table of Contents](#0)

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

In [None]:
# 查看pytorch版本
print(torch.__version__)

In [None]:
'''執行環境參數設定'''

# (Boolean)是否為本機
LOCAL = False

# (Boolean)是否為 Colab
COLAB = False


'''檔案路徑參數設定'''

# (String)Root路徑
if LOCAL:
    PATH = r'../'
elif COLAB:
    PATH = r'/content/drive/My Drive/Colab Notebooks/'
else:
    PATH = r'../input/'
    OUTPUT_PATH = r'/kaggle/working/'
    
# (String)資料根路徑
DATA_ROOT_PATH = PATH+r'vinbigdata-256-image-dataset/' 

# (String)CSV根路徑
CSV_ROOT_PATH = PATH+r'trainyolo/' 

# (String)訓練資料路徑
TRAIN_DATA_PATH = DATA_ROOT_PATH+r'vinbigdata/train/'

# (String)訓練CSV路徑
TRAIN_CSV_PATH = CSV_ROOT_PATH+r'train_yolo.csv'

# (String)專案名稱
PROJECT_NAME = 'vinbigdata-chest-xray-abnormalities-detection'

# (Boolean)是否要匯入Library
IMPORT_PYTORCH_LIBRARY = False

# (String)Library的路徑
PYTORCH_LIBRARY_PATH = PATH + "PyTorch_Library/"

# (String)專案檔案儲存路徑
if LOCAL or COLAB:
    OUTPUT_PATH = PATH
PROJECT_PATH = OUTPUT_PATH+PROJECT_NAME+'/'+PROJECT_NAME+' '+datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

# (String)權重名稱(使用哪個權重)
WEIGHTS_NAME = 'efficientnet_b0'

# (String)模型名稱(使用哪個模型)
MODEL_NAME = 'efficientnet_b0'

# (String)讀取預訓練權重的儲存路徑
LOAD_WEIGHTS_PATH = PROJECT_PATH+r'/models/pretrain_weights/'+WEIGHTS_NAME+'.pth'

# (String)讀取預訓練模型的儲存路徑
LOAD_MODEL_PATH = PROJECT_PATH+r'/models/pretrain_models/'+MODEL_NAME+'.pth'

# (String)訓練模型的儲存路徑
TRAIN_MODEL_PATH = PROJECT_PATH+r'/models/'+MODEL_NAME+'.pth'

In [None]:
if DEVICE != torch.device("cpu"):
    !nvidia-smi

In [None]:
if not LOCAL and COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    
if not os.path.isdir(PROJECT_PATH+r'/models/'):
    os.makedirs(PROJECT_PATH+r'/models/')
    
if IMPORT_PYTORCH_LIBRARY:
    sys.path.append(PYTORCH_LIBRARY_PATH + "Custom_Loss.py")
    sys.path.append(PYTORCH_LIBRARY_PATH + "Custom_Model.py")

# 3. 開發參數設定<a class="anchor" id="3"></a>
[Back to Table of Contents](#0)

In [None]:
'''客製參數設定'''


'''資料參數設定'''

# (Int)有CSV檔該參數才有用，1則為不做交叉驗證
FOLD = 5

# (String)圖片副檔名
IMAGE_NAME_EXTENSION = '.png'

# (String)CSV圖片檔名欄位
IMAGE_NAME = 'image_id'

# (String)CSV標籤欄位
LABEL_NAME = 'class_name'

# (String)CSV標籤ID欄位
LABEL_ID = 'class_id'

# (Boolean)是否有空物件框的csv資料
EMPTY_BOUNDING_BOX = True

if EMPTY_BOUNDING_BOX:
    # (Int)CSV空物件框標籤ID
    EMPTY_BOUNDING_BOX_LABEL_ID = 14
    
# (Int)不同的種子會產生不同的Random或分層K-FOLD分裂, 42則是預設固定種子
SEED = 42

if FOLD == 1:
    # (Float)驗證集佔訓練集的比率，FOLD>1則不啟用
    DATA_SPLIT = 0.2
else:
    # (String)切分訓練集跟驗證集方式 GroupKFold
    KF = GroupKFold(n_splits = FOLD)


''''圖表參數設定'''

# (Float)全部SNS圖表的字形縮放
ALL_SNS_FONT_SCALE = 1.0

# (Int)CSV缺失值圖表寬度
CSV_COUNTPLOT_FIGSIZE_W = 10

# (Int)CSV缺失值圖表高度
CSV_COUNTPLOT_FIGSIZE_H = 10

# (Int)CSV缺失值圖表標題字型大小
CSV_COUNTPLOT_TITLE_FONTSIZE = 20

# (Int)CSV缺失值圖表X軸標題字型大小
CSV_COUNTPLOT_XLABEL_FONTSIZE = 15

# (Int)CSV缺失值圖表Y軸標題字型大小
CSV_COUNTPLOT_YLABEL_FONTSIZE = 15

In [None]:
# 設置sns圖表縮放係數
sns.set(font_scale = ALL_SNS_FONT_SCALE)

# 4. 資料處理<a class="anchor" id="4"></a>
[Back to Table of Contents](#0)

## 4.1 載入CSV檔 <a class="anchor" id="4.1"></a>
[Back to Table of Contents](#0)

In [None]:
print('Reading data...')

# 讀取訓練資料集CSV檔
train_csv = pd.read_csv(TRAIN_CSV_PATH,encoding="utf8")
if EMPTY_BOUNDING_BOX:
    train_csv = train_csv[train_csv.class_id != EMPTY_BOUNDING_BOX_LABEL_ID].reset_index(drop = True)

print('Reading data completed')

In [None]:
# 顯示訓練資料集CSV檔
train_csv.head()

In [None]:
print("Shape of train_data :", train_csv.shape)

## 4.2 檢查CSV檔缺失值 <a class="anchor" id="4.2"></a>
[Back to Table of Contents](#0)

In [None]:
# 缺失值比率
total = train_csv.isnull().sum().sort_values(ascending = False)
percent = (train_csv.isnull().sum()/train_csv.isnull().count()*100).sort_values(ascending = False)
missing_train_csv  = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_train_csv.head(missing_train_csv.shape[0])

In [None]:
train_csv[LABEL_NAME].value_counts()

In [None]:
f,ax = plt.subplots(figsize=(CSV_COUNTPLOT_FIGSIZE_W, CSV_COUNTPLOT_FIGSIZE_H))
sns.countplot(train_csv[LABEL_NAME], hue = train_csv[LABEL_NAME],ax = ax)
plt.title("LABEL COUNT", fontsize=CSV_COUNTPLOT_TITLE_FONTSIZE)
plt.xlabel(LABEL_NAME.upper(), fontsize=CSV_COUNTPLOT_XLABEL_FONTSIZE)
plt.ylabel("COUNT", fontsize=CSV_COUNTPLOT_YLABEL_FONTSIZE)
plt.legend()
plt.show()

In [None]:
def main():
    try:
        print('Training start')
        since = time.time()
        if FOLD > 1:
            train_csv['Folds'] = -1
            for fold, (train_index, valid_index) in enumerate(KF.split(train_csv, groups = train_csv[train_csv.columns[0]].values)):
                train_csv.loc[valid_index, 'Folds'] = fold
#                 train_process(fold = fold, kf = True)
#         else:
#             train_process(fold = 0, kf = False)
        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    except Exception as exception:
        print(exception)
        raise

In [None]:
if __name__ == '__main__':
    main()

In [None]:
# train_files = []
# val_files = []
# val_files += list(train_csv[train_csv.Folds == 0].image_path.unique())
# train_files += list(train_csv[train_csv.Folds != 0].image_path.unique())
# print(len(train_files), len(val_files))

In [None]:
# os.makedirs('/kaggle/working/vinbigdata/labels/train', exist_ok = True)
# os.makedirs('/kaggle/working/vinbigdata/labels/val', exist_ok = True)
# os.makedirs('/kaggle/working/vinbigdata/images/train', exist_ok = True)
# os.makedirs('/kaggle/working/vinbigdata/images/val', exist_ok = True)
# label_dir = '/kaggle/input/vinbigdata-yolo-labels-dataset/labels'
# for file in tqdm(train_files):
#     shutil.copy(file, '/kaggle/working/vinbigdata/images/train')
#     filename = file.split('/')[-1].split('.')[0]
#     shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/vinbigdata/labels/train')
    
# for file in tqdm(val_files):
#     shutil.copy(file, '/kaggle/working/vinbigdata/images/val')
#     filename = file.split('/')[-1].split('.')[0]
#     shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/vinbigdata/labels/val')

In [None]:
# features = ['x_min', 'y_min', 'x_max', 'y_max', 'x_mid', 'y_mid', 'w', 'h', 'area']
# X = train_df[features]
# y = train_df['class_id']
# X.shape, y.shape

In [None]:
# class_ids, class_names = list(zip(*set(zip(train_df.class_id, train_df.class_name))))
# classes = list(np.array(class_names)[np.argsort(class_ids)])
# classes = list(map(lambda x: str(x), classes))
# classes

# Copying Files

In [None]:
# os.makedirs('/kaggle/working/vinbigdata/labels/train', exist_ok = True)
# os.makedirs('/kaggle/working/vinbigdata/labels/val', exist_ok = True)
# os.makedirs('/kaggle/working/vinbigdata/images/train', exist_ok = True)
# os.makedirs('/kaggle/working/vinbigdata/images/val', exist_ok = True)
# label_dir = '/kaggle/input/vinbigdata-yolo-labels-dataset/labels'
# for file in tqdm(train_files):
#     shutil.copy(file, '/kaggle/working/vinbigdata/images/train')
#     filename = file.split('/')[-1].split('.')[0]
#     shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/vinbigdata/labels/train')
    
# for file in tqdm(val_files):
#     shutil.copy(file, '/kaggle/working/vinbigdata/images/val')
#     filename = file.split('/')[-1].split('.')[0]
#     shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/vinbigdata/labels/val')

# Get Class Name

In [None]:
# class_ids, class_names = list(zip(*set(zip(train_df.class_id, train_df.class_name))))
# classes = list(np.array(class_names)[np.argsort(class_ids)])
# classes = list(map(lambda x: str(x), classes))
# classes

# [YOLOv5](https://github.com/ultralytics/yolov5)
![](https://user-images.githubusercontent.com/26833433/98699617-a1595a00-2377-11eb-8145-fc674eb9b1a7.jpg)
![](https://user-images.githubusercontent.com/26833433/90187293-6773ba00-dd6e-11ea-8f90-cd94afc0427f.png)

# YOLOv5 Stuff

In [None]:
# from os import listdir
# from os.path import isfile, join
# import yaml

# cwd = '/kaggle/working/'

# with open(join( cwd , 'train.txt'), 'w') as f:
#     for path in glob('/kaggle/working/vinbigdata/images/train/*'):
#         f.write(path+'\n')
            
# with open(join( cwd , 'val.txt'), 'w') as f:
#     for path in glob('/kaggle/working/vinbigdata/images/val/*'):
#         f.write(path+'\n')

# data = dict(
#     train =  join( cwd , 'train.txt') ,
#     val   =  join( cwd , 'val.txt' ),
#     nc    = 14,
#     names = classes
#     )

# with open(join( cwd , 'vinbigdata.yaml'), 'w') as outfile:
#     yaml.dump(data, outfile, default_flow_style=False)

# f = open(join( cwd , 'vinbigdata.yaml'), 'r')
# print('\nyaml:')
# print(f.read())

In [None]:
# # https://www.kaggle.com/ultralytics/yolov5
# # !git clone https://github.com/ultralytics/yolov5  # clone repo
# # %cd yolov5
# shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5')
# os.chdir('/kaggle/working/yolov5')
# # %pip install -qr requirements.txt # install dependencies

# import torch
# from IPython.display import Image, clear_output  # to display images

# clear_output()
# print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
# !python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images/
# Image(filename='runs/detect/exp/zidane.jpg', width=600)

## Pretrained Checkpoints:

| Model | AP<sup>val</sup> | AP<sup>test</sup> | AP<sub>50</sub> | Speed<sub>GPU</sub> | FPS<sub>GPU</sub> || params | FLOPS |
|---------- |------ |------ |------ | -------- | ------| ------ |------  |  :------: |
| [YOLOv5s](https://github.com/ultralytics/yolov5/releases/tag/v3.0)    | 37.0     | 37.0     | 56.2     | **2.4ms** | **416** || 7.5M   | 13.2B
| [YOLOv5m](https://github.com/ultralytics/yolov5/releases/tag/v3.0)    | 44.3     | 44.3     | 63.2     | 3.4ms     | 294     || 21.8M  | 39.4B
| [YOLOv5l](https://github.com/ultralytics/yolov5/releases/tag/v3.0)    | 47.7     | 47.7     | 66.5     | 4.4ms     | 227     || 47.8M  | 88.1B
| [YOLOv5x](https://github.com/ultralytics/yolov5/releases/tag/v3.0)    | **49.2** | **49.2** | **67.7** | 6.9ms     | 145     || 89.0M  | 166.4B
| | | | | | || |
| [YOLOv5x](https://github.com/ultralytics/yolov5/releases/tag/v3.0) + TTA|**50.8**| **50.8** | **68.9** | 25.5ms    | 39      || 89.0M  | 354.3B
| | | | | | || |
| [YOLOv3-SPP](https://github.com/ultralytics/yolov5/releases/tag/v3.0) | 45.6     | 45.5     | 65.2     | 4.5ms     | 222     || 63.0M  | 118.0B

# Selecting Models
In this notebok I'm using `v5s`. To select your prefered model just replace `--cfg models/yolov5s.yaml --weights yolov5s.pt` with the following command:
* `v5s` : `--cfg models/yolov5s.yaml --weights yolov5s.pt`
* `v5m` : `--cfg models/yolov5m.yaml --weights yolov5m.pt`
* `v5l` : `--cfg models/yolov5l.yaml --weights yolov5l.pt`
* `v5x` : `--cfg models/yolov5x.yaml --weights yolov5x.pt`

# Train

In [None]:
# # !WANDB_MODE="dryrun" python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --nosave --cache 
# !WANDB_MODE="dryrun" python train.py --img 640 --batch 16 --epochs 30 --data /kaggle/working/vinbigdata.yaml --weights yolov5x.pt --cache

# Class Distribution

In [None]:
# plt.figure(figsize = (20,20))
# plt.axis('off')
# plt.imshow(plt.imread('runs/train/exp/labels_correlogram.jpg'));

In [None]:
# plt.figure(figsize = (20,20))
# plt.axis('off')
# plt.imshow(plt.imread('runs/train/exp/labels.jpg'));

# Batch Image

In [None]:
# import matplotlib.pyplot as plt
# plt.figure(figsize = (15, 15))
# plt.imshow(plt.imread('runs/train/exp/train_batch0.jpg'))

# plt.figure(figsize = (15, 15))
# plt.imshow(plt.imread('runs/train/exp/train_batch1.jpg'))

# plt.figure(figsize = (15, 15))
# plt.imshow(plt.imread('runs/train/exp/train_batch2.jpg'))

# GT Vs Pred

In [None]:
# fig, ax = plt.subplots(3, 2, figsize = (2*5,3*5), constrained_layout = True)
# for row in range(3):
#     ax[row][0].imshow(plt.imread(f'runs/train/exp/test_batch{row}_labels.jpg'))
#     ax[row][0].set_xticks([])
#     ax[row][0].set_yticks([])
#     ax[row][0].set_title(f'runs/train/exp/test_batch{row}_labels.jpg', fontsize = 12)
    
#     ax[row][1].imshow(plt.imread(f'runs/train/exp/test_batch{row}_pred.jpg'))
#     ax[row][1].set_xticks([])
#     ax[row][1].set_yticks([])
#     ax[row][1].set_title(f'runs/train/exp/test_batch{row}_pred.jpg', fontsize = 12)

# (Loss, Map) Vs Epoch

In [None]:
# plt.figure(figsize=(30,15))
# plt.axis('off')
# plt.imshow(plt.imread('runs/train/exp/results.png'));

# Confusion Matrix

In [None]:
# plt.figure(figsize=(30,15))
# plt.axis('off')
# plt.imshow(plt.imread('runs/train/exp/confusion_matrix.png'));

# Inference

# Inference Plot