# 이안류 분류

In [None]:
import ultralytics
ultralytics.checks()

In [6]:
import os
import random
import shutil
from tqdm.notebook import tqdm
import cv2
import glob
import json
import gc

# 데이터 전처리

In [7]:
# 학습, 검증, 테스트 이미지 폴더 생성

data_root = '/Users/kimhongseok/cv_79_projects/part2/19/data'
file_root = os.path.join(data_root, 'Training')
pjt_name = 'rip'

train_root = f'{file_root}/{pjt_name}/train'
valid_root = f'{file_root}/{pjt_name}/valid'
test_root = f'{file_root}/{pjt_name}/test'

for folder in [train_root, valid_root, test_root]:
    # train, valid, test 경로 생성
    if not os.path.exists(folder):
        os.makedirs(folder)
    # 각 폴더에 images, labels 폴더 생성
    for s in ['images', 'labels']:
        s_folder = f'{folder}/{s}'
        if not os.path.exists(s_folder):
            os.makedirs(s_folder)

In [10]:
# 모든 라벨 데이터 경로 가져오기
file_list = glob.glob(f'{file_root}/labels/*/*.json')

In [5]:
# Bounding Box 형태 변환: json에서 정보를 추출해서 yolo에 맞게 변환한다.

def json_to_yolo_bbox(bbox, w, h):
    x_center = ((bbox[0][0]+bbox[1][0])/2)/w
    y_center = ((bbox[0][1]+bbox[3][1])/2)/h
    width = (bbox[1][0] - bbox[0][0])/w
    height = (bbox[3][1] - bbox[0][1])/h

    return [x_center, y_center, width, height]

In [None]:
len(file_list)

In [None]:
# 멀티 쓰레드를 사용해서 빠르게 처리
import concurrent.futures
import warnings

# 오류 경고 무시하기
warnings.filterwarnings(action='ignore')

def process_chunk(chunk):
    print('시작')
    tbar = tqdm(chunk)
    for file in tbar:
        result = set()
        with open(file, 'r') as f:
            json_data = json.load(f)
            width, height = list(map(int, json_data['image_info']['resolution'].split(',')))
            cls = 0
            num_b = json_data['annotations']['bounding_count']
            if num_b > 0:
                for b in json_data['annotations']['drawing']:
                    yolo_bbox = json_to_yolo_bbox(b, width, height)
                    bbox_string = ' '.join([str(x) for x in yolo_bbox])
                    result.add(f'{cls} {bbox_string}')

                result = list(result)
                if result:
                    with open(file.replace('json', 'txt'), 'w', encoding='utf-8') as t:
                        t.write('\n'.join(result))
        
    return

with concurrent.futures.ThreadPoolExecutor(max_workers=5) as excutor:
    futures = []
    for start in range(0, 66260, 13252):
        end = start + 13252
        chunk = file_list[start:end]
        future = excutor.submit(process_chunk, chunk)
        futures.append(future)
        del future
        gc.collect()

    for future in concurrent.futures.as_completed(futures):
        future.result()
        print('끝')

In [None]:
random.seed(2024)
file_list = glob.glob(f'{file_root}/labels/*/*.txt')

random.shuffle(file_list)
test_ratio = 0.1
num_file = len(file_list)

test_list = file_list[:int(num_file*test_ratio)]
valid_list = file_list[int(num_file*test_ratio):int(num_file*test_ratio)*2]
train_list = file_list[int(num_file*test_ratio)*2:]

for i in test_list:
    txt_name = i.split('/')[-1]
    shutil.copyfile(i, f'{test_root}/labels/{txt_name}')
    img_path = i.replace('labels', 'images').replace('TL', 'TS').replace('JSON', '이미지').replace('txt', 'jpg')
    img_name = img_path.split('/')[-1]
    shutil.copyfile(img_path, f'{test_root}/images/{img_name}')

for i in valid_list:
    txt_name = i.split('/')[-1]
    shutil.copyfile(i, f'{valid_root}/labels/{txt_name}')
    img_path = i.replace('labels', 'images').replace('TL', 'TS').replace('JSON', '이미지').replace('txt', 'jpg')
    img_name = img_path.split('/')[-1] 
    shutil.copyfile(img_path, f'{valid_root}/images/{img_name}')

for i in train_list:
    txt_name = i.split('/')[-1]
    shutil.copyfile(i, f'{train_root}/labels/{txt_name}')
    img_path = i.replace('labels', 'images').replace('TL', 'TS').replace('JSON', '이미지').replace('txt', 'jpg')
    img_name = img_path.split('/')[-1]
    shutil.copyfile(img_path, f'{train_root}/images/{img_name}')

# Config 생성

In [15]:
pjt_root = '/Users/kimhongseok/cv_79_projects/part2/19'

In [21]:
import yaml
data = dict()

data['train'] = train_root
data['val'] = valid_root
data['test'] = test_root
data['nc'] = 1
data['names'] = ['yes']

with open(f'{pjt_root}/rip.yaml', 'w') as f:
    yaml.dump(data, f)

# training

In [1]:
import ultralytics
from ultralytics import YOLO

In [2]:
pjt_root = '/Users/kimhongseok/cv_79_projects/part2/19'

data_root = '/Users/kimhongseok/cv_79_projects/part2/19/data'
file_root = f'{data_root}/Training'
pjt_name = 'rip'

In [3]:
%cd /Users/kimhongseok/cv_79_projects/part2/19

/Users/kimhongseok/cv_79_projects/part2/19


In [4]:
import torch
print(torch.backends.mps.is_available())  # MPS가 사용 가능한지 확인
print(torch.backends.mps.is_built())      # MPS 지원이 빌드되었는지 확인

True
True


In [5]:
# model 호출
model = YOLO('yolov8s.pt')
result = model.train(data='rip.yaml', epochs=1, batch=64, imgsz=224, device='mps', patience=30, name='rip_yolo_v8_small')

New https://pypi.org/project/ultralytics/8.2.88 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.2.66 🚀 Python-3.11.7 torch-2.4.0 MPS (Apple M1 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=rip.yaml, epochs=1, time=None, patience=30, batch=64, imgsz=224, save=True, save_period=-1, cache=False, device=mps, workers=8, project=None, name=rip_yolo_v8_small8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False,

  self.scaler = torch.cuda.amp.GradScaler(enabled=self.amp)
[34m[1mtrain: [0mScanning /Users/kimhongseok/cv_79_projects/part2/19/data/Training/rip/train/labels.cache... 42843 images, 0 backgrounds, 0 corrupt: 100%|██████████| 42843/42843 [00:00<?, ?it/s]
INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.14 (you have 1.4.12). Upgrade using: pip install -U albumentations. To disable automatic update checks, set the environment variable NO_ALBUMENTATIONS_UPDATE to 1.


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /Users/kimhongseok/cv_79_projects/part2/19/data/Training/rip/valid/labels.cache... 5355 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5355/5355 [00:00<?, ?it/s]


Plotting labels to runs/detect/rip_yolo_v8_small8/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 224 train, 224 val
Using 0 dataloader workers
Logging results to [1mruns/detect/rip_yolo_v8_small8[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1         0G      3.917      6.395      2.187        156        224:   3%|▎         | 20/670 [00:39<21:36,  1.99s/it]


KeyboardInterrupt: 