**운전자 이상행동 데이터셋 다운로드 및 변환**


In [None]:
import os
import shutil
import json
import random
from tqdm import tqdm
from google.colab import files
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/CAB/CAB_dataset/JPEGImages
!unzip -qq "/content/drive/MyDrive/CAB/CAB_dataset/JPEGImages/images_bbox.zip"

/content/drive/MyDrive/CAB/CAB_dataset/JPEGImages


In [None]:
%cd /content/drive/MyDrive/CAB/CAB_dataset/Annotations
!unzip -qq "/content/drive/MyDrive/CAB/CAB_dataset/Annotations/label_bbox.zip"

/content/drive/MyDrive/CAB/CAB_dataset/temp
unzip:  cannot find or open /content/drive/MyDrive/CAB/CAB_dataset/temp/졸음운전 예방을 위한 운전자 상태 정보 영상, /content/drive/MyDrive/CAB/CAB_dataset/temp/졸음운전 예방을 위한 운전자 상태 정보 영상.zip or /content/drive/MyDrive/CAB/CAB_dataset/temp/졸음운전 예방을 위한 운전자 상태 정보 영상.ZIP.


파일 경로 변환
- /content/drive/MyDrive/CAB/CAB_dataset/JPEGImages 하위 디렉토리에 있는 이미지를 /content/drive/MyDrive/CAB/CAB_dataset/JPEGImages로 옮김
- /content/drive/MyDrive/CAB/CAB_dataset/Annotations 하위 디렉토리에 있는 이미지를 /content/drive/MyDrive/CAB/CAB_dataset/Annotations로 옮김

In [None]:
def move_file(base_dir):
  for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.lower().endswith(('.jpg', 'json')):
            source_path = os.path.join(root, file)
            destination_path = os.path.join(base_dir, file)

            shutil.move(source_path, destination_path)

jpeg_dir = "/content/drive/MyDrive/CAB/CAB_dataset/JPEGImages"
annot_dir = "/content/drive/MyDrive/CAB/CAB_dataset/Annotations"

move_file(jpeg_dir)
move_file(annot_dir)

json 파일 형태를 YOLOv5 훈련에 사용할 수 있도록 txt로 변환한다.

In [None]:
class_mapping = {
      "Face": 0,
      "Leye": 1,
      "Reye": 2,
      "Mouth": 3,
      "Cigar": 4,
      "Phone": 5
}

In [None]:
source_path = "/content/drive/MyDrive/CAB/CAB_dataset/Annotations/"
dest_path = "/content/drive/MyDrive/CAB/CAB_dataset/TextAnnotations/"

# 지정된 경로에 있는 모든 파일과 디렉토리를 가져옴
json_files = [f for f in os.listdir(source_path) if f.endswith('.json')]

for json_file in json_files:
  file_path = os.path.join(source_path, json_file)

  with open(file_path, 'r', encoding='utf-8') as f:
      data = json.load(f)

  file_name = data["FileInfo"]["FileName"].split(".")[0]

  # 이미지 크기
  img_width = data["FileInfo"]["Width"]
  img_height = data["FileInfo"]["Height"]

  # txt 파일을 생성할 리스트
  yolo_data = []

  # 바운딩 박스를 확인하여 YOLO 형식으로 변환
  for obj, bbox in data["ObjectInfo"]["BoundingBox"].items():
      if bbox["isVisible"]:
          class_id = class_mapping[obj]
          xmin, ymin, xmax, ymax = bbox["Position"]

          # YOLO 형식의 바운딩 박스 (중앙 좌표와 크기) 계산
          x_center = (xmin + xmax) / 2 / img_width
          y_center = (ymin + ymax) / 2 / img_height
          width = (xmax - xmin) / img_width
          height = (ymax - ymin) / img_height

          # YOLO 포맷: 클래스 ID와 정규화된 좌표
          yolo_data.append(f"{class_id} {x_center} {y_center} {width} {height}")

  output_file = os.path.join(dest_path, file_name + ".txt")

  with open(output_file, "w") as f:
      for line in yolo_data:
          f.write(line + "\n")

다음으로, 모든 Image의 절대 경로가 적힌 리스트를 만든다.



In [None]:
base_path = "/content/drive/MyDrive/CAB/CAB_dataset/JPEGImages"
img_files = []

for file in os.listdir(base_path):
  if file.endswith(".jpg"):
    file_path = os.path.join(base_path, file)
    img_files.append(file_path)

yolo 학습을 위해 annotation 파일과 image 파일을 한 폴더로 옮긴다.

In [None]:
img_source_path = "/content/drive/MyDrive/CAB/CAB_dataset/JPEGImages/"
txt_source_path = "/content/drive/MyDrive/CAB/CAB_dataset/TextAnnotations/"
dest_path = "/content/drive/MyDrive/CAB/CAB_dataset/TxtImg"

for f in os.listdir(img_source_path):
  if f.endswith(".jpg"):
    shutil.move(os.path.join(img_source_path, f), dest_path)

for f in os.listdir(txt_source_path):
  if f.endswith(".txt"):
    shutil.move(os.path.join(txt_source_path, f), dest_path)

전체 이미지를 6:2:2 비율로 나누어 train set, validation set, test set을 생성한다.




In [None]:
data_root = "/content/drive/MyDrive/CAB/CAB_dataset/"

train_root = os.path.join(data_root, "train")
val_root = os.path.join(data_root, "val")
test_root = os.path.join(data_root, "test")

os.makedirs(train_root, exist_ok=True)
os.makedirs(val_root, exist_ok=True)
os.makedirs(test_root, exist_ok=True)

In [None]:
random.shuffle(img_files)

for idx, img_src in tqdm(enumerate(img_files)):
  txt_src = os.path.splitext(img_src)[0] + ".txt"
  img_name = os.path.split(img_src)[-1]
  text_name = os.path.split(txt_src)[-1]

  if idx % 10 < 6:
    img_dst = os.path.join(train_root, img_name)
    txt_dst = os.path.join(train_root, text_name)
  elif idx % 10 < 8:
    img_dst = os.path.join(val_root, img_name)
    txt_dst = os.path.join(val_root, text_name)
  else:
    img_dst = os.path.join(test_root, img_name)
    txt_dst = os.path.join(test_root, text_name)

  shutil.copy2(img_src, img_dst)
  shutil.copy2(txt_src, txt_dst)

12563it [09:05, 23.02it/s]


추후 훈련된 모델을 이용하여 예측할 이미지 폴더를 생성한다.

In [None]:
test_no_txt = os.path.join(data_root, "test_no_txt")
os.makedirs(test_no_txt, exist_ok=True)

for f in os.listdir(test_root):
  if f.endswith(".jpg"):
    src_path = os.path.join(test_root, f)
    dst_path = os.path.join(test_no_txt, f)

    shutil.copy2(src_path, dst_path)

train, val, test, test_no_txt 폴더에 이미지들이 잘 옮겨졌는지 확인한다.

In [None]:
def count_img_files(directory):
  f = [f for f in os.listdir(directory) if f.endswith(".jpg")]
  return len(f)

count_img_files(train_root)
count_img_files(val_root)
count_img_files(test_root)
# count_img_files(test_no_txt)

**YOLOv5 환경 구성**

In [None]:
!git clone -b v7.0 https://github.com/jetsonai/yolov5
%cd yolov5
%pip install -qr requirements.txt  # install

Cloning into 'yolov5'...
remote: Enumerating objects: 15656, done.[K
remote: Total 15656 (delta 0), reused 0 (delta 0), pack-reused 15656 (from 1)[K
Receiving objects: 100% (15656/15656), 14.50 MiB | 18.42 MiB/s, done.
Resolving deltas: 100% (10695/10695), done.
Note: switching to '915bbf294bb74c859f0b41f1c23bc395014ea679'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by switching back to a branch.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -c with the switch command. Example:

  git switch -c <new-branch-name>

Or undo this operation with:

  git switch -

Turn off this advice by setting config variable advice.detachedHead to false

/content/yolov5


**Train**

훈련을 위한 설정 파일, yaml 파일을 제작한다.

In [None]:
text_lines = '''
train:
  - /content/drive/MyDrive/CAB/CAB_dataset/train
val:
  - /content/drive/MyDrive/CAB/CAB_dataset/val
test:
  - /content/drive/MyDrive/CAB/CAB_dataset/test

# Classes
names:
  0: Face
  1: Leye
  2: Reye
  3: Mouth
  4: Cigar
  5: Phone
'''

with open("/content/yolov5/cabdata.yaml", 'w') as f:
    f.write(text_lines)

In [None]:
!python train.py --img 480 --batch 32 --epochs 20 --data cabdata.yaml --weights yolov5s.pt --cache

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.aut

In [None]:
files.download('/content/yolov5/runs/train/exp4/weights/best.pt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

**TEST**

In [None]:
!python val.py --task "test" --img 480 --batch 32 --data cabdata.yaml --weights /content/yolov5/runs/train/exp4/weights/best.pt

[34m[1mval: [0mdata=cabdata.yaml, weights=['/content/yolov5/runs/train/exp4/weights/best.pt'], batch_size=32, imgsz=480, conf_thres=0.001, iou_thres=0.6, max_det=300, task=test, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 v7.0-0-g915bbf29 Python-3.10.12 torch-2.5.0+cu121 CUDA:0 (Tesla T4, 15102MiB)

  ckpt = torch.load(attempt_download(w), map_location='cpu')  # load
Fusing layers... 
Model summary: 157 layers, 7026307 parameters, 0 gradients, 15.8 GFLOPs
[34m[1mtest: [0mScanning /content/drive/MyDrive/CAB/CAB_dataset/test... 2512 images, 0 backgrounds, 0 corrupt: 100% 2512/2512 [02:21<00:00, 17.72it/s] 
[34m[1mtest: [0mNew cache created: /content/drive/MyDrive/CAB/CAB_dataset/test.cache
                 Class     Images  Instances          P          R      mAP50   mAP50-95:   1% 1/79 [00:02<03:26,  2.65s/it]Ex