# 데이터파일 확보 및 압축풀기

In [1]:
import os
import zipfile

In [2]:
with zipfile.ZipFile('./data_source/SELECTSTAR.zip') as target_file:
    target_file.extractall('./data_source/SELECTSTAR/')

# Json 파일에서 데이터를 추출하여 YOLO용 txt파일 만들기

In [3]:
import json

In [4]:
json_dir = './data_source/SELECTSTAR/3_button_json/'
json_list = [name for name in os.listdir(json_dir)]
json_list.sort()

In [5]:
label_dir = './data_source/SELECTSTAR/button_labels/'
if not os.path.exists(label_dir):
    os.makedirs(label_dir)

In [6]:
for name in json_list:
    with open(json_dir + name) as file:
        obj = json.load(file)

        label = '0' if obj['label']=='false' else '1'
        w = obj['bbox']['x2'] - obj['bbox']['x1']
        h = obj['bbox']['y2'] - obj['bbox']['y1']
        x = obj['bbox']['x2'] - w/2
        y = obj['bbox']['y2'] - h/2

        label_filename = label_dir + os.path.splitext(name)[0] + '.txt'
        wfile = open(label_filename, 'w')
        wfile.write(f"0 {x:f} {y:f} {w:f} {h:f}\n")        
        wfile.close()

# 데이터 파일을 훈련용, 검증용, 테스트용으로 분할하기
- 분할 시, 불량과 양품의 비율을 맞추어야 하므로 good/bad로 분류한 후 분할하고, 다시 합치도록 함

In [7]:
import glob
import shutil

In [8]:
if not os.path.exists('./data_source/GB/bad/'):
    os.makedirs('./data_source/GB/bad/')

if not os.path.exists('./data_source/GB/good/'):
    os.makedirs('./data_source/GB/good/')

In [9]:
bad_list = glob.glob('./data_source/SELECTSTAR/2_button_image/Button_F*.jpg')

for src_path in bad_list:
    dst_path = './data_source/GB/bad/' + os.path.basename(src_path)
    shutil.copy2(src_path, dst_path)

In [10]:
good_list = glob.glob('./data_source/SELECTSTAR/2_button_image/Button_T*.jpg')

for src_path in good_list:
    dst_path = './data_source/GB/good/' + os.path.basename(src_path)
    shutil.copy2(src_path, dst_path)

- 데이터 분할을 위한 splitfolders 라이브러리 설치 후 분할 작업 수행

In [11]:
!pip install split-folders

Collecting split-folders
  Using cached split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [12]:
import splitfolders as sf

In [13]:
sf.ratio("./data_source/GB/", output="./dataset/", seed=1357, ratio=(.8, .1, .1))

In [14]:
if not os.path.exists('./dataset/test/images/'):
    os.makedirs('./dataset/test/images/')

if not os.path.exists('./dataset/train/images/'):
    os.makedirs('./dataset/train/images/')

if not os.path.exists('./dataset/val/images/'):
    os.makedirs('./dataset/val/images/')

In [15]:
if not os.path.exists('./dataset/test/labels/'):
    os.makedirs('./dataset/test/labels/')

if not os.path.exists('./dataset/train/labels/'):
    os.makedirs('./dataset/train/labels/')

if not os.path.exists('./dataset/val/labels/'):
    os.makedirs('./dataset/val/labels/')

In [16]:
dest_dir = './dataset/test/images/'
file_list = glob.glob('./dataset/test/bad/*.*')

for src_path in file_list:
    shutil.move(src_path, dest_dir + os.path.basename(src_path))

file_list = glob.glob('./dataset/test/good/*.*')

for src_path in file_list:
    shutil.move(src_path, dest_dir + os.path.basename(src_path))

if os.path.exists('./dataset/test/bad/'):
    os.rmdir('./dataset/test/bad/')
if os.path.exists('./dataset/test/good/'):
    os.rmdir('./dataset/test/good/')

In [17]:
dest_dir = './dataset/train/images/'
file_list = glob.glob('./dataset/train/bad/*.*')

for src_path in file_list:
    shutil.move(src_path, dest_dir + os.path.basename(src_path))

file_list = glob.glob('./dataset/train/good/*.*')

for src_path in file_list:
    shutil.move(src_path, dest_dir + os.path.basename(src_path))

if os.path.exists('./dataset/train/bad/'):
    os.rmdir('./dataset/train/bad/')
if os.path.exists('./dataset/train/good/'):
    os.rmdir('./dataset/train/good/')

In [18]:
dest_dir = './dataset/val/images/'
file_list = glob.glob('./dataset/val/bad/*.*')

for src_path in file_list:
    shutil.move(src_path, dest_dir + os.path.basename(src_path))

file_list = glob.glob('./dataset/val/good/*.*')

for src_path in file_list:
    shutil.move(src_path, dest_dir + os.path.basename(src_path))

if os.path.exists('./dataset/val/bad/'):
    os.rmdir('./dataset/val/bad/')
if os.path.exists('./dataset/val/good/'):
    os.rmdir('./dataset/val/good/')

In [33]:
src_dir = './data_source/SELECTSTAR/button_labels/'
dest_dir = './dataset/test/labels/'
file_list = os.listdir('./dataset/test/images/')

for src_path in file_list:
    shutil.move(src_dir + src_path.replace('.jpg', '.txt'), dest_dir)

In [34]:
src_dir = './data_source/SELECTSTAR/button_labels/'
dest_dir = './dataset/train/labels/'
file_list = os.listdir('./dataset/train/images/')

for src_path in file_list:
    shutil.move(src_dir + src_path.replace('.jpg', '.txt'), dest_dir)

In [35]:
src_dir = './data_source/SELECTSTAR/button_labels/'
dest_dir = './dataset/val/labels/'
file_list = os.listdir('./dataset/val/images/')

for src_path in file_list:
    shutil.move(src_dir + src_path.replace('.jpg', '.txt'), dest_dir)

# YAML 파일 만들기

In [36]:
import yaml

In [37]:
data = {
    'train': './dataset/train/images/',
    'val': './dataset/val/images/',
    'test': './dataset/test/images/',
    'names': ['bad', 'good'],
    'nc': 2
}

In [38]:
with open('./fab.yaml', 'w') as f:
    yaml.dump(data, f)

In [39]:
with open('./fab.yaml', 'r') as f:
    aquarium_yaml = yaml.safe_load(f)
    display(aquarium_yaml)

{'names': ['bad', 'good'],
 'nc': 2,
 'test': './dataset/test/images/',
 'train': './dataset/train/images/',
 'val': './dataset/val/images/'}

# YOLO 모델 설치

In [26]:
!pip install ultralytics

Collecting ultralytics
  Using cached ultralytics-8.0.172-py3-none-any.whl (614 kB)
Collecting torchvision>=0.9.0
  Using cached torchvision-0.15.2-cp310-cp310-manylinux1_x86_64.whl (6.0 MB)
Collecting numpy>=1.22.2
  Using cached numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
Collecting pillow>=7.1.2
  Using cached Pillow-10.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (3.4 MB)
Collecting py-cpuinfo
  Using cached py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Collecting tqdm>=4.64.0
  Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)
Collecting pandas>=1.1.4
  Using cached pandas-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)
Collecting opencv-python>=4.6.0
  Using cached opencv_python-4.8.0.76-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (61.7 MB)
Collecting seaborn>=0.11.0
  Using cached seaborn-0.12.2-py3-none-any.whl (293 kB)
Collecting matplotlib>=3.2.2
  Using cached matplotlib-3.7.2-cp310-cp310-manylinux_2_17_x

In [27]:
import ultralytics

In [28]:
ultralytics.checks()

Ultralytics YOLOv8.0.172 🚀 Python-3.10.12 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 4080 Laptop GPU, 12010MiB)
Setup complete ✅ (32 CPUs, 15.2 GB RAM, 109.5/937.3 GB disk)


# 사전학습 모델 설정

In [40]:
from ultralytics import YOLO

In [41]:
model = YOLO('yolov8n.pt')

In [43]:
yaml_path = os.getcwd()+'/fab.yaml'
model.train(data=yaml_path, epochs=100, patience=30, batch=32, imgsz=416)

Ultralytics YOLOv8.0.172 🚀 Python-3.10.12 torch-2.0.1+cu117 CUDA:0 (NVIDIA GeForce RTX 4080 Laptop GPU, 12010MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/home/seokhwan/workspace/fabr/fab.yaml, epochs=100, patience=30, batch=32, imgsz=416, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f0740141720>
fitness: 0.9589062415119242
keys: ['metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)']
maps: array([     0.9831,     0.92669])
names: {0: 'bad', 1: 'good'}
plot: False
results_dict: {'metrics/precision(B)': 0.9894232352579517, 'metrics/recall(B)': 1.0, 'metrics/mAP50(B)': 0.995, 'metrics/mAP50-95(B)': 0.9548958239021379, 'fitness': 0.9589062415119242}
save_dir: PosixPath('runs/detect/train3')
speed: {'preprocess': 0.06121397018432617, 'inference': 0.5631804466247559, 'loss': 0.0002741813659667969, 'postprocess': 0.19423961639404297}

In [44]:
results = model.predict(source='./dataset/test/images/', save=True)


image 1/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_B_14.jpg: 416x416 1 good, 2.2ms
image 2/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_B_18.jpg: 416x416 1 bad, 2.2ms
image 3/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_B_25.jpg: 416x416 1 bad, 3.4ms
image 4/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_B_26.jpg: 416x416 1 bad, 3.3ms
image 5/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_B_46.jpg: 416x416 1 bad, 2.2ms
image 6/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_W_50.jpg: 416x416 1 bad, 2.2ms
image 7/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_W_54.jpg: 416x416 1 bad, 2.2ms
image 8/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_W_73.jpg: 416x416 1 bad, 2.1ms
image 9/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_W_83.jpg: 416x416 1 bad, 2.2ms
image 10/20 /home/seokhwan/workspace/fabr/dataset/test/images/Button_F_W_94.jpg: 416x416 