In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Custom data training

## <span style='background-color:#fff5b1'>학습데이터 디렉토리 구조

- train/validation/test 데이터셋 디렉토리를 각각 생성한다.
- <span style='background-color:#fff5b1'>**images**</span> 폴더에 <span style='background-color:#fff5b1'>**이미지**</span> 파일을, <span style='background-color:#fff5b1'>**labels**</span> 폴더에 <span style='background-color:#fff5b1'>**annotation**</span> 파일들을 저장한다.
    - <span style='background-color:#fff5b1'>**annotation 파일**</span>은 대상 <span style='background-color:#fff5b1'>**image 파일과 이름**</span>이 같아야 하고 확장자는 <span style='background-color:#fff5b1'>**txt**</span>로 한다.
    
![image.png](attachment:image.png)

- YOLO object detection annotation 파일 형식
    - 공백을 구분자로 하는 csv 파일 형식
    - 한줄에 한개 object의 정보를 작성한다.
    - `label center_x, center_y, bbox_width, bbox_height`  
        - bbox 위치정보는 image의 size 에 대한 비율로 정의한다.
        - 예)
            ```
            0.0    0.329934450147929     0.5672360751315789    0.25200291331360947   0.15803108815789477
            0.0    0.8026219956360947    0.5186609456578948    0.0975965039940829    0.08419689131578946
            ```
        

## <span style='background-color:#fff5b1'>학습 설정 yaml 파일 작성
- 다음 항목들을 작성한다.
    - train: train dataset 저장경로
    - test: test dataset 저장경로
    - val: validation dataset 저장경로
        - train/test/validation 저장경로는 절대경로로 작성한다.
    -  <span style='background-color:#fff5b1'>nc: class 개수
    - names: class name  <span style='background-color:#fff5b1'>**작성 형태는 꼭!! 지켜줘야 한다.**</span>
        ```yaml
        train: C:\datasets\train
        test: C:\datasets\test
        val: C:\datasets\val
        nc: 2
        names: ["car", "bus"]
        #혹은
        names:
            - 'car'
            - 'bus'
        ```
        
> yaml: https://yaml.org/

## <span style='background-color:#fff5b1'>Training 하기 
### CLI

<span style='background-color:#fff5b1'><b style='font-size:1.3em'>1.  학습시작</b>    
```bash
yolo detect train data=custom_data.yaml model=yolov8n.pt epochs=100 imgsz=640
```
- model: train 지정
- data: 학습관련 설정 파일 경로로 다음을 설정한다.
    - train/test/validation dataset 저장경로
    - class 개수
    - class name
- model: fine tuning 시 시작 Pretrained Model 파일 경로 (학습시킬 모델파일)
- args
    - epochs: 반복횟수
    - imgsz: input image size
    - https://docs.ultralytics.com/cfg/
    
<span style='background-color:#fff5b1'><b style='font-size:1.3em'>2. 이어서 학습하기</b>    
```bash
yolo detect train resume model=last.pt
```
- model: <span style='background-color:#fff5b1'>마지막으로 저장된 파일 경로

### <span style='background-color:#fff5b1'>Python

1. YOLO객체 생성
    - Fine tuning할 경우 pretrained 모델파일 저장 경로를 지정한다.
    - 새로 학습할 경우 모델구조를 정의한 yaml 설정파일 저장 경로를 지정한다.
2. train
    - train 설정 yaml 파일의 위치와 train을 위한 정보들을 매개변수로 전달한다.

# 과일, 야채 image detection

In [7]:
# 디렉토리, 파일 경로 변수
import yaml
from pathlib import Path
G_DRIVE = Path('/content/drive/MyDrive/Playdata/8. Yolo')
DATA_SOURCE_DIR_PATH = G_DRIVE / 'Fruits and Vegi.v13-verison8.yolov8.zip'
DATA_TARGET_DIR_PATH = Path('./data')

In [9]:
# 구글 드라이브의 데이터 파일을 Local VM으로 복사 한 뒤 압축
import shutil
import os

os.makedirs(DATA_TARGET_DIR_PATH, exist_ok = True)

shutil.copy(DATA_SOURCE_DIR_PATH, DATA_TARGET_DIR_PATH / 'data.zip')

PosixPath('data/data.zip')

In [10]:
from zipfile import ZipFile
with ZipFile(DATA_TARGET_DIR_PATH / 'data.zip') as zipFile:
  zipFile.extractall(DATA_TARGET_DIR_PATH)

In [11]:
fruits = ['Apple', 'Banana', 'Beetroot', 'Bitter_Gourd', 'Bottle_Gourd', 'Cabbage', 'Capsicum', 'Carrot', 'Cauliflower', 'Cherry', 'Chilli', 'Coconut', 'Cucumber', 
          'EggPlant', 'Ginger', 'Grape', 'Green_Orange', 'Kiwi', 'Maize', 'Mango', 'Melon', 'Okra', 'Onion', 'Orange', 'Peach', 'Pear', 'Peas',
          'Pineapple', 'Pomegranate', 'Potato', 'Radish', 'Strawberry', 'Tomato', 'Turnip', 'Watermelon']

In [None]:
len(fruits)

35

## Dataset 다운 및 복사

- https://universe.roboflow.com/bohni-tech/fruits-and-vegi/

## yaml 설정파일 작성

In [None]:
!pip install pyyaml



In [12]:
# python: dictionary <----------> yaml
# 출력할 데이터를 dictionary로 구성
import yaml
import os
data = {
    'train': '/content/data/train',
    'val': '/content/data/valid',
    'test': '/content/data/test',
    'nc': 35,
    'names': fruits
}

os.makedirs('data', exist_ok = True) # 저장할 디렉토리 생성

with open('data/custom_data.yaml', 'wt') as fw:
    yaml.dump(data, fw) # data로 쓸거고, fw를 사용할것이다.

In [None]:
with open('data/custom_data.yaml', 'rt') as fr:
    d = yaml.safe_load(fr)
    print(type(d))
    print(d)

<class 'dict'>
{'names': ['Apple', 'Banana', 'Beetroot', 'Bitter_Gourd', 'Bottle_Gourd', 'Cabbage', 'Capsicum', 'Carrot', 'Cauliflower', 'Cherry', 'Chilli', 'Coconut', 'Cucumber', 'EggPlant', 'Ginger', 'Grape', 'Green_Orange', 'Kiwi', 'Maize', 'Mango', 'Melon', 'Okra', 'Onion', 'Orange', 'Peach', 'Pear', 'Peas', 'Pineapple', 'Pomegranate', 'Potato', 'Radish', 'Strawberry', 'Tomato', 'Turnip', 'Watermelon'], 'nc': 35, 'test': '/Users/seokminlee/Desktop/mose/8. Yolo/test', 'train': '/Users/seokminlee/Desktop/mose/8. Yolo/train', 'val': '/Users/seokminlee/Desktop/mose/8. Yolo/val'}


## 학습

In [19]:
# 현재 작업 경로를 Google drive 로 변경.
!pwd # 경로 확인
# os.chdir('/content')

/content


In [None]:
!nvidia-smi

In [None]:
# yolov8 설치
!pip install ultralytics

In [21]:
from ultralytics import YOLO

model = YOLO('yolov8n.pt')
# 학습 - train() 메소드 호출
model.train(data = 'data/custom_data.yaml', 
            epochs = 1, 
            patience = 20, # 지정한 epoch 만큼 성능이 개선이 안되면 조기종료.
            batch = 16, # batch size (dafault)
            imgsz = 640, # input batch size (default)
            optimizer = 'Adam')

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to yolov8n.pt...


  0%|          | 0.00/6.23M [00:00<?, ?B/s]

Ultralytics YOLOv8.0.52 🚀 Python-3.9.16 torch-1.13.1+cu116 CUDA:0 (Tesla T4, 15102MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=data/custom_data.yaml, epochs=1, patience=20, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=False, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, image_weights=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, hide_labels=False, hide_conf=False, vid_stride=1, line_thickness=3, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False,

  0%|          | 0.00/755k [00:00<?, ?B/s]

Overriding model.yaml nc=80 with nc=35

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.Conv                  [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.Conv                  [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.C2f                   [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.Conv                  [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.C2f                   [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.Conv                  [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.C2f                   [128, 128, 2, True]           
  7                  -1  1    295424  ultralytic

In [None]:
      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        1/1      2.73G      1.454      3.328      1.644         79        640: 100%|██████████| 411/411 [09:13<00:00,  1.35s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:09<00:00,  1.89s/it] # 50 ~ 95프로에서 나온 평균
                   all        147        613      0.309      0.159     0.0846      0.044

- <b style='font-size:1.5em'>결과</b>
```bash
Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
    100/100      39.5G     0.7396     0.5254      1.153         98        640: 100%|██████████| 137/137 [01:04<00:00,  2.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:06<00:00,  3.05s/it]
                   all        147        613      0.768      0.846      0.877      0.652

100 epochs completed in 3.049 hours.
Optimizer stripped from runs/detect/train3/weights/last.pt, 52.0MB
Optimizer stripped from runs/detect/train3/weights/best.pt, 52.0MB

Validating runs/detect/train3/weights/best.pt...
Ultralytics YOLOv8.0.50 🚀 Python-3.8.10 torch-1.13.1+cu116 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
Model summary (fused): 218 layers, 25860025 parameters, 0 gradients, 78.8 GFLOPs
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:05<00:00,  3.00s/it]
                   all        147        613      0.803      0.826      0.876      0.652
                 Apple        147         22      0.806      0.864      0.897      0.772
                Banana        147          7      0.777      0.857      0.917      0.582
          Bitter_Gourd        147          4      0.683       0.25      0.351      0.125
               Cabbage        147         38       0.85      0.896      0.953      0.731
                Carrot        147          9          1      0.967      0.995      0.531
                Cherry        147          2      0.814          1      0.995      0.895
               Coconut        147          1      0.379          1      0.995      0.995
              Cucumber        147         13      0.901      0.846       0.86      0.648
                Ginger        147         14      0.902      0.929      0.952      0.592
                 Grape        147          6       0.85          1      0.995      0.752
          Green_Orange        147         12      0.894          1      0.995      0.913
                  Kiwi        147         13      0.681      0.988      0.936      0.702
                 Mango        147         33      0.966      0.879      0.964      0.749
                 Melon        147         31      0.845      0.529      0.709      0.394
                  Okra        147          2      0.921        0.5      0.502      0.301
                 Onion        147         15      0.798      0.867      0.938      0.618
                Orange        147         14      0.933      0.857      0.932      0.805
                 Peach        147         16      0.941      0.688      0.904      0.748
                  Pear        147          9       0.93          1      0.995      0.798
                  Peas        147          1      0.408          1      0.995      0.895
             Pineapple        147         30      0.844      0.733      0.812      0.463
           Pomegranate        147         18      0.928          1      0.995      0.836
                Potato        147        208      0.767      0.659      0.764      0.419
            Strawberry        147         19      0.937          1      0.995      0.807
                Tomato        147         50      0.796       0.74       0.88      0.583
                Turnip        147          3      0.297      0.333       0.45      0.203
            Watermelon        147         23      0.829      0.913      0.973      0.738
Speed: 1.3ms preprocess, 1.7ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to runs/detect/train3
```


## 평가
- YOLO객체 생성시 저장된 모델파일 경로를 넣어 생성한다.
    - 학습할때 사용한 yaml파일 학습때와 동일한 경로에 있어야 한다.
    - train/validation/test 데이터셋이 yaml에 등록된 경로에 있어야 한다.
- `YOLO.val()` 이용
    - DetMetrics 타입 객체에 결과를 담아 반환

In [None]:
best_model_path = '/content/runs/detect/train/weights/best.pt'

best_model = YOLO(best_model_path)
metrics = best_model.val()

In [23]:
print(metrics.box.map) # map50 ~ 95

0.04402817515069767


In [24]:
print(metrics.box.map50)

0.08463139018548108


In [25]:
print(metrics.box.map75)

0.04010952686825973


In [26]:
print(metrics.box.maps) # 클래스별 ap score를 확인

[   0.089144   0.0010612    0.044028    0.069385    0.044028    0.038932    0.044028   0.0079777    0.044028  0.00012495    0.044028  0.00058187   0.0057591    0.044028           0     0.04675     0.12598    0.064969    0.044028     0.16044    0.044274   0.0018084    0.014379    0.033966    0.098479    0.020178
   0.0048037    0.029938     0.07741     0.14285    0.044028    0.054906    0.043294  0.00028567    0.011081]


In [28]:
for label, ap in zip(fruits, metrics.box.maps):
  print(label, ':', ap)

Apple : 0.08914415007655715
Banana : 0.001061157341763486
Beetroot : 0.04402817515069767
Bitter_Gourd : 0.06938531337698783
Bottle_Gourd : 0.04402817515069767
Cabbage : 0.038931629490050676
Capsicum : 0.04402817515069767
Carrot : 0.007977709401709401
Cauliflower : 0.04402817515069767
Cherry : 0.00012495289126366604
Chilli : 0.04402817515069767
Coconut : 0.0005818713450292396
Cucumber : 0.005759105634366257
EggPlant : 0.04402817515069767
Ginger : 0.0
Grape : 0.04675035757267292
Green_Orange : 0.12597658869974088
Kiwi : 0.06496895163377812
Maize : 0.04402817515069767
Mango : 0.16044147403670397
Melon : 0.04427418866465559
Okra : 0.0018083706511497124
Onion : 0.014378856792371275
Orange : 0.03396581373262143
Peach : 0.09847916974595292
Pear : 0.020177540846156007
Peas : 0.004803696991663647
Pineapple : 0.02993790032059011
Pomegranate : 0.07741042756262739
Potato : 0.14285462522373066
Radish : 0.04402817515069767
Strawberry : 0.054905612795455284
Tomato : 0.04329415999881422
Turnip : 0.000

# 추론

In [9]:
from glob import glob
TEST_FILES_PATH = glob('test_image/fruits_veg/*.jpg')
TEST_FILES_PATH

['test_image/fruits_veg/melon2.jpg',
 'test_image/fruits_veg/melon1.jpg',
 'test_image/fruits_veg/banana2.jpg',
 'test_image/fruits_veg/capsicum2.jpg',
 'test_image/fruits_veg/capsicum1.jpg',
 'test_image/fruits_veg/pineapple.jpg',
 'test_image/fruits_veg/grape.jpg',
 'test_image/fruits_veg/apple.jpg',
 'test_image/fruits_veg/pomegranate.jpg',
 'test_image/fruits_veg/banana.jpg',
 'test_image/fruits_veg/eggplant.jpg',
 'test_image/fruits_veg/pineapple2.jpg']

In [11]:
from ultralytics import YOLO

best_model = YOLO('runs1/detect/train/weights/best.pt')
for path in TEST_FILES_PATH:
    result = best_model(path, save = True, save_txt = True)


image 1/1 /Users/seokminlee/Desktop/mose/8. Yolo/test_image/fruits_veg/melon2.jpg: 640x640 1 Melon, 641.3ms
Speed: 0.4ms preprocess, 641.3ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/predict10[0m
1 label saved to runs/detect/predict10/labels

image 1/1 /Users/seokminlee/Desktop/mose/8. Yolo/test_image/fruits_veg/melon1.jpg: 640x608 1 Melon, 625.3ms
Speed: 0.7ms preprocess, 625.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/predict10[0m
2 labels saved to runs/detect/predict10/labels

image 1/1 /Users/seokminlee/Desktop/mose/8. Yolo/test_image/fruits_veg/banana2.jpg: 640x640 1 Banana, 495.7ms
Speed: 0.9ms preprocess, 495.7ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/predict10[0m
3 labels saved to runs/detect/predict10/labels

[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
image 1/1 /Users/s