In [1]:
%pip install ultralytics
import os

import cv2
import numpy as np
from tqdm import tqdm
import shutil

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
from pathlib import Path
from tqdm import tqdm
import shutil
import os
import cv2

BASE_DIR = '/content/drive/MyDrive/2학년/2학기/deeplearning/dataset'

def create_dataset_structure():
   """데이터셋 디렉터리 구조 생성"""
   dataset_path = os.path.join(BASE_DIR, 'dataset')
   for split in ['train', 'val']:
       for subdir in ['images', 'labels']:
           os.makedirs(os.path.join(dataset_path, split, subdir), exist_ok=True)

def process_single_file(args):
   """단일 annotation 파일을 YOLO 형식으로 변환"""
   txt_file, annot_path, img_path, save_path, classes = args

   img_file = txt_file.replace('.txt', '.jpg')
   img = cv2.imread(os.path.join(img_path, img_file))
   if img is None:
       return

   height, width = img.shape[:2]
   yolo_annots = []

   with open(os.path.join(annot_path, txt_file), 'r') as f:
       for line in f:
           try:
               x, y, w, h, _, category, _, _ = map(float, line.strip().split(',')[:8])
               category = str(int(category))

               if category not in classes:
                   continue

               x_center = (x + w/2) / width
               y_center = (y + h/2) / height
               w = w / width
               h = h / height

               yolo_annots.append(f"{classes[category]} {x_center} {y_center} {w} {h}\n")
           except:
               continue

   if yolo_annots:
       with open(os.path.join(save_path, txt_file), 'w') as f:
           f.writelines(yolo_annots)

def convert_visdrone_to_yolo(annot_path, img_path, save_path):
   """VisDrone 데이터를 YOLO 형식으로 변환"""
   classes = {str(i): i-1 for i in range(1, 11)}
   os.makedirs(save_path, exist_ok=True)

   txt_files = list(Path(annot_path).glob("*.txt"))
   args_list = [(txt_file.name, annot_path, img_path, save_path, classes) for txt_file in txt_files]

   with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
       list(tqdm(executor.map(process_single_file, args_list),
                total=len(txt_files),
                desc="Converting annotations"))

def copy_single_file(args):
   """단일 이미지 파일 복사"""
   img_path, dst_img_path = args
   shutil.copy2(img_path, dst_img_path)

def copy_images_parallel(src_img_path, dst_img_path):
   """이미지 파일 병렬 복사"""
   os.makedirs(dst_img_path, exist_ok=True)
   images = list(Path(src_img_path).glob("*.jpg"))

   args_list = [(img_path, dst_img_path) for img_path in images]

   with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
       list(tqdm(executor.map(copy_single_file, args_list),
                total=len(images),
                desc="Copying images"))

def process_dataset():
   """전체 데이터셋 처리"""
   create_dataset_structure()

   # Training set 처리
   print("Processing training set...")
   train_annot = os.path.join(BASE_DIR, 'VisDrone2019-DET-train/annotations')
   train_img = os.path.join(BASE_DIR, 'VisDrone2019-DET-train/images')
   train_save = os.path.join(BASE_DIR, 'dataset/train/labels')

   convert_visdrone_to_yolo(train_annot, train_img, train_save)
   copy_images_parallel(train_img, os.path.join(BASE_DIR, 'dataset/train/images'))

   # Validation set 처리
   print("\nProcessing validation set...")
   val_annot = os.path.join(BASE_DIR, 'VisDrone2019-DET-val/annotations')
   val_img = os.path.join(BASE_DIR, 'VisDrone2019-DET-val/images')
   val_save = os.path.join(BASE_DIR, 'dataset/val/labels')

   convert_visdrone_to_yolo(val_annot, val_img, val_save)
   copy_images_parallel(val_img, os.path.join(BASE_DIR, 'dataset/val/images'))

if __name__ == "__main__":
   process_dataset()

Processing training set...


Converting annotations: 0it [00:00, ?it/s]
Copying images: 0it [00:00, ?it/s]



Processing validation set...


Converting annotations: 0it [00:00, ?it/s]
Copying images: 0it [00:00, ?it/s]


In [None]:
from concurrent.futures import ThreadPoolExecutor
import zipfile
import os
import shutil
import os

BASE_DIR = 'C:\Users\sonym\20242R0136COSE47402\pretrained'

def extract_file(zip_file, target_dir, file_name):
    """단일 파일 압축 해제 함수"""
    # 디렉토리인 경우 생성만 하고 반환
    if file_name.endswith('/'):
        target_path = os.path.join(target_dir, file_name)
        os.makedirs(target_path, exist_ok=True)
        return

    target_path = os.path.join(target_dir, file_name)
    os.makedirs(os.path.dirname(target_path), exist_ok=True)

    try:
        with zip_file.open(file_name) as source, open(target_path, 'wb') as target:
            shutil.copyfileobj(source, target)
    except:
        print(f"Error extracting: {file_name}")

def restore_dataset_from_backup():
    """병렬 압축 해제"""
    backup_file = os.path.join(BASE_DIR, 'processed_dataset_backup.zip')
    extract_dir = os.path.join(BASE_DIR, 'dataset')

    if not os.path.exists(backup_file):
        print("Backup file not found")
        return

    print("Restoring dataset from backup...")
    os.makedirs(extract_dir, exist_ok=True)

    with zipfile.ZipFile(backup_file, 'r') as zip_file:
        # 먼저 디렉토리 생성
        file_list = zip_file.namelist()
        directories = [f for f in file_list if f.endswith('/')]
        for d in directories:
            os.makedirs(os.path.join(extract_dir, d), exist_ok=True)

        # 파일만 추출
        files = [f for f in file_list if not f.endswith('/')]

        # 병렬 압축 해제
        with ThreadPoolExecutor() as executor:
            list(tqdm(
                executor.map(lambda f: extract_file(zip_file, extract_dir, f), files),
                total=len(files),
                desc="Extracting files"
            ))
    print("Dataset restored successfully")
restore_dataset_from_backup()

Backup file not found


In [2]:
import os

BASE_DIR = 'C:/Users/sonym/20242R0136COSE47402/pretrained'

In [3]:
def create_yaml():
    yaml_content = f"""
path: {BASE_DIR}  # dataset root dir
train: train/images
val: val/images

# Classes
names:
  0: pedestrian
  1: people
  2: bicycle
  3: car
  4: van
  5: truck
  6: tricycle
  7: awning-tricycle
  8: bus
  9: motor
"""
    with open(os.path.join(BASE_DIR, 'visdrone.yaml'), 'w') as f:
        f.write(yaml_content)

create_yaml()

In [None]:
from ultralytics import YOLO

# nano 모델로 시작
model = YOLO('yolov8n.pt')

# 학습 시작
results = model.train(
   data=os.path.join(BASE_DIR, 'visdrone.yaml'),
   epochs=30,
   patience=5,
   imgsz=640,  
   batch=16,  
)

New https://pypi.org/project/ultralytics/8.3.47 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.40  Python-3.12.4 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4080, 16376MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=C:/Users/sonym/20242R0136COSE47402/pretrained\visdrone.yaml, epochs=30, time=None, patience=5, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train6, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None,

[34m[1mtrain: [0mScanning C:\Users\sonym\20242R0136COSE47402\pretrained\train\labels.cache... 6471 images, 0 backgrounds, 0 corrupt: 100%|██████████| 6471/6471 [00:00<?, ?it/s]




[34m[1mval: [0mScanning C:\Users\sonym\20242R0136COSE47402\pretrained\val\labels.cache... 548 images, 0 backgrounds, 0 corrupt: 100%|██████████| 548/548 [00:00<?, ?it/s]


Plotting labels to runs\detect\train6\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns\detect\train6[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/30      8.54G      1.845      2.391       1.04        461        640: 100%|██████████| 405/405 [00:36<00:00, 11.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.13it/s]

                   all        548      38759      0.291      0.172      0.138     0.0747






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/30      9.92G      1.682      1.522     0.9941        600        640: 100%|██████████| 405/405 [00:33<00:00, 12.06it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.65it/s]

                   all        548      38759      0.246      0.205      0.168     0.0926






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/30      9.35G      1.642      1.421     0.9825        702        640: 100%|██████████| 405/405 [00:32<00:00, 12.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.77it/s]

                   all        548      38759      0.273      0.222      0.192      0.107






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/30      6.48G      1.608      1.345     0.9743        385        640: 100%|██████████| 405/405 [00:32<00:00, 12.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.78it/s]


                   all        548      38759      0.297      0.241      0.219       0.12

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/30       8.7G      1.592      1.309      0.969        469        640: 100%|██████████| 405/405 [00:31<00:00, 12.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.49it/s]


                   all        548      38759      0.321      0.252      0.226      0.126

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/30      8.66G      1.573      1.268     0.9606        584        640: 100%|██████████| 405/405 [00:32<00:00, 12.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.02it/s]

                   all        548      38759      0.319      0.253      0.227      0.127






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/30      7.34G      1.551       1.24     0.9599        862        640: 100%|██████████| 405/405 [00:31<00:00, 12.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.00it/s]


                   all        548      38759      0.306      0.266       0.23      0.126

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/30      9.73G       1.54      1.218     0.9527        601        640: 100%|██████████| 405/405 [00:32<00:00, 12.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.12it/s]

                   all        548      38759      0.326      0.264      0.244      0.137






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/30      8.58G      1.531      1.205      0.953        474        640: 100%|██████████| 405/405 [00:32<00:00, 12.59it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.22it/s]

                   all        548      38759      0.356      0.273      0.257      0.146






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/30      9.55G      1.518       1.18     0.9508        665        640: 100%|██████████| 405/405 [00:32<00:00, 12.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.80it/s]


                   all        548      38759      0.342       0.28      0.254      0.143

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/30      9.25G      1.503      1.168     0.9464        361        640: 100%|██████████| 405/405 [00:32<00:00, 12.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.20it/s]


                   all        548      38759      0.346      0.281       0.26      0.147

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/30      10.9G      1.498      1.155     0.9454        378        640: 100%|██████████| 405/405 [00:32<00:00, 12.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.02it/s]

                   all        548      38759      0.374      0.283       0.27      0.152






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/30      6.16G      1.496      1.143     0.9436        537        640: 100%|██████████| 405/405 [00:31<00:00, 12.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.33it/s]


                   all        548      38759      0.359      0.282      0.266      0.152

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/30      8.13G      1.472      1.127     0.9382        674        640: 100%|██████████| 405/405 [00:32<00:00, 12.60it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.26it/s]


                   all        548      38759      0.383      0.287      0.275      0.157

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/30      10.7G      1.477      1.123     0.9391        454        640: 100%|██████████| 405/405 [00:32<00:00, 12.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.52it/s]


                   all        548      38759      0.383      0.293      0.283      0.161

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/30      10.4G       1.47      1.113     0.9369        607        640: 100%|██████████| 405/405 [00:32<00:00, 12.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.96it/s]

                   all        548      38759      0.389      0.286      0.283      0.161






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/30      7.51G      1.455      1.096     0.9361        660        640: 100%|██████████| 405/405 [00:32<00:00, 12.46it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.33it/s]

                   all        548      38759      0.384      0.294      0.283       0.16






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/30      8.39G       1.46      1.092     0.9332        552        640: 100%|██████████| 405/405 [00:32<00:00, 12.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.25it/s]


                   all        548      38759       0.39      0.294      0.287      0.163

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/30      6.68G       1.45      1.086     0.9317        309        640: 100%|██████████| 405/405 [00:32<00:00, 12.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.24it/s]


                   all        548      38759      0.405      0.297       0.29      0.165

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/30      7.22G      1.443      1.073     0.9326        639        640: 100%|██████████| 405/405 [00:32<00:00, 12.60it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.33it/s]

                   all        548      38759      0.402      0.298       0.29      0.165





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/30       8.5G      1.413      1.058     0.9306        370        640: 100%|██████████| 405/405 [00:30<00:00, 13.45it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  8.16it/s]

                   all        548      38759      0.394      0.294      0.284      0.164






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/30      6.99G      1.395      1.034     0.9274        319        640: 100%|██████████| 405/405 [00:29<00:00, 13.57it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.79it/s]

                   all        548      38759      0.395      0.287      0.287      0.165






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/30      7.52G      1.389      1.023     0.9256        409        640: 100%|██████████| 405/405 [00:30<00:00, 13.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  6.83it/s]

                   all        548      38759      0.394      0.293      0.289      0.166






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/30       8.5G      1.383      1.012     0.9248        275        640: 100%|██████████| 405/405 [00:31<00:00, 12.87it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  6.99it/s]

                   all        548      38759      0.389        0.3      0.291      0.166






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/30      9.18G      1.377      1.002     0.9232        244        640: 100%|██████████| 405/405 [00:31<00:00, 12.99it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.09it/s]


                   all        548      38759       0.42      0.299      0.299       0.17

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/30      7.57G      1.371     0.9966     0.9217        323        640: 100%|██████████| 405/405 [00:31<00:00, 12.84it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.35it/s]


                   all        548      38759      0.402      0.301      0.297      0.169

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/30      8.76G      1.365     0.9851     0.9194        431        640: 100%|██████████| 405/405 [00:31<00:00, 12.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.41it/s]


                   all        548      38759      0.396      0.309        0.3      0.172

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/30      6.73G      1.361     0.9835     0.9171        273        640: 100%|██████████| 405/405 [00:31<00:00, 12.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.30it/s]


                   all        548      38759      0.409      0.305      0.301      0.173

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/30       7.7G      1.355     0.9765     0.9166        287        640: 100%|██████████| 405/405 [00:31<00:00, 12.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.39it/s]


                   all        548      38759      0.406      0.304      0.299      0.172

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/30      8.49G      1.347     0.9666     0.9152        216        640: 100%|██████████| 405/405 [00:31<00:00, 12.89it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.35it/s]


                   all        548      38759      0.408      0.304      0.301      0.174

30 epochs completed in 0.296 hours.
Optimizer stripped from runs\detect\train6\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train6\weights\best.pt, 6.2MB

Validating runs\detect\train6\weights\best.pt...
Ultralytics 8.3.40  Python-3.12.4 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4080, 16376MiB)
Model summary (fused): 168 layers, 3,007,598 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:08<00:00,  2.06it/s]


                   all        548      38759      0.409      0.303      0.301      0.174
            pedestrian        520       8844      0.408      0.314      0.309       0.13
                people        482       5125      0.478      0.206      0.248     0.0873
               bicycle        364       1287      0.207       0.08     0.0626      0.024
                   car        515      14064      0.579      0.733      0.727      0.489
                   van        421       1975       0.44      0.335      0.339      0.233
                 truck        266        750      0.462      0.284      0.295      0.199
              tricycle        337       1045        0.4      0.211       0.21      0.109
       awning-tricycle        220        532      0.211      0.141      0.109     0.0718
                   bus        131        251      0.503      0.375      0.402      0.278
                 motor        485       4886      0.401       0.35      0.312      0.123
Speed: 0.2ms preproce

In [None]:
import os
import torch
import torch.nn as nn
from ultralytics import YOLO
import torch.nn.functional as F


# CBAM 모듈 예시 (채널+공간 어텐션)
class CBAMBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()
        # Channel Attention
        self.channel_att = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(channels, channels // reduction, 1),
            nn.ReLU(inplace=True),
            nn.Conv2d(channels // reduction, channels, 1)
        )
        # Spatial Attention
        self.spatial_att = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3),
            nn.Sigmoid()
        )

    def forward(self, x):
        # Channel Attention
        avg_out = self.channel_att(x)
        max_out = self.channel_att(torch.max(x, dim=1, keepdim=True)[0])
        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        # Spatial Attention
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)
        spatial_att = self.spatial_att(spatial_in)
        x = x * spatial_att
        return x


# 개선된 FEM 모듈 예시
class FeatureEnhancementModule(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        # 채널 축소 및 그룹 컨볼루션 활용
        reduced_channels = in_channels // 4
        self.reduce = nn.Conv2d(in_channels, reduced_channels, 1, bias=False)

        self.conv3x3 = nn.Conv2d(reduced_channels, reduced_channels, 3, padding=1, groups=reduced_channels, bias=False)
        self.conv5x5 = nn.Conv2d(reduced_channels, reduced_channels, 5, padding=2, groups=reduced_channels, bias=False)

        self.bn = nn.BatchNorm2d(reduced_channels * 2)  # concat 후 batchnorm
        self.cbam = CBAMBlock(reduced_channels * 2, reduction=16)

        self.expand = nn.Conv2d(reduced_channels * 2, in_channels, 1, bias=False)

    def forward(self, x):
        x = self.reduce(x)
        feat3x3 = self.conv3x3(x)
        feat5x5 = self.conv5x5(x)
        feat_cat = torch.cat([feat3x3, feat5x5], dim=1)
        feat_cat = self.bn(feat_cat)
        feat_att = self.cbam(feat_cat)
        return self.expand(feat_att)


# CUDA 최적화 설정
torch.backends.cudnn.benchmark = True

# 모델 로드 (yolov8m.pt 대신 다른 모델도 시도 가능)
model = YOLO('yolov8n.pt')

# in_channels 확인 후 FEM 모듈 삽입
in_channels = 768  # yolov8m neck 마지막 출력 채널(모델 구조 print 참고)
fem = FeatureEnhancementModule(in_channels)

neck_last_layer = model.model.model[-2]
detect_layer = model.model.model[-1]

model.model.model = nn.Sequential(
    *list(model.model.model[:-2]),
    nn.Sequential(
        neck_last_layer,
        fem
    ),
    detect_layer
)

# 모든 파라미터 학습 가능하도록 설정
for param in model.model.parameters():
    param.requires_grad = True

# 경로 설정 (사용자 환경에 맞게 수정)
BASE_DIR = 'C:/Users/sonym/20242R0136COSE47402/pretrained'
VISDRONE_YAML = os.path.join(BASE_DIR, 'visdrone.yaml')

# Augmentation 활성화 (data.yaml에서 mosaic, mixup 등 설정하거나 train 파라미터로 지정)
# 예: model.train(augment=True) 또는 data.yaml 내 augmentation 항목 조정

# 학습 시작
# epochs 증가(예: 50으로), scheduler 사용 등의 미세 조정 가능
results = model.train(
    data=VISDRONE_YAML,
    epochs=30,      
    patience=5,
    imgsz=1024,
    batch=16,
    name='visdrone_detection_fem_cbam',
    freeze=0,
    half=True,       
    verbose=True,       
    augment=True        # 기본 augmentation 활성화
)

# 이후 필요하다면 일정 에폭 후 Backbone Freeze/Unfreeze 전략, LR scheduler 조정, 데이터 증강 다양화 등을 추가 실험


New https://pypi.org/project/ultralytics/8.3.47 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.40  Python-3.12.4 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4080, 16376MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=C:/Users/sonym/20242R0136COSE47402/pretrained\visdrone.yaml, epochs=30, time=None, patience=5, batch=16, imgsz=1024, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=visdrone_detection_fem_cbam3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=0, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=True, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=Fa

[34m[1mtrain: [0mScanning C:\Users\sonym\20242R0136COSE47402\pretrained\train\labels.cache... 6471 images, 0 backgrounds, 0 corrupt: 100%|██████████| 6471/6471 [00:00<?, ?it/s]




[34m[1mval: [0mScanning C:\Users\sonym\20242R0136COSE47402\pretrained\val\labels.cache... 548 images, 0 backgrounds, 0 corrupt: 100%|██████████| 548/548 [00:00<?, ?it/s]


Plotting labels to runs\detect\visdrone_detection_fem_cbam3\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 1024 train, 1024 val
Using 8 dataloader workers
Logging results to [1mruns\detect\visdrone_detection_fem_cbam3[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/30     0.543G      1.643      2.256      1.077        476       1024: 100%|██████████| 405/405 [04:05<00:00,  1.65it/s] 
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:05<00:00,  3.04it/s]

                   all        548      38759      0.262       0.24      0.204      0.116






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/30        12G      1.519      1.465      1.031        611       1024: 100%|██████████| 405/405 [03:14<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  4.72it/s]

                   all        548      38759      0.341      0.303      0.263      0.151






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/30      15.4G      1.491      1.353      1.019        710       1024: 100%|██████████| 405/405 [01:53<00:00,  3.57it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  4.88it/s]

                   all        548      38759      0.372      0.324       0.29      0.169






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/30      20.5G      1.464      1.275       1.01        393       1024: 100%|██████████| 405/405 [02:17<00:00,  2.95it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  4.59it/s]

                   all        548      38759      0.387      0.333      0.312      0.181






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/30      26.8G      1.454      1.236      1.005        475       1024: 100%|██████████| 405/405 [02:39<00:00,  2.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  4.69it/s]

                   all        548      38759      0.412      0.336      0.322      0.191






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/30      14.9G       1.44       1.19     0.9973        600       1024: 100%|██████████| 405/405 [01:39<00:00,  4.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.38it/s]

                   all        548      38759      0.435      0.342      0.338        0.2






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/30      21.6G      1.423      1.164     0.9945        912       1024: 100%|██████████| 405/405 [02:23<00:00,  2.83it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.40it/s]

                   all        548      38759      0.442      0.342      0.334      0.198






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/30      21.3G      1.412      1.146     0.9879        644       1024: 100%|██████████| 405/405 [06:31<00:00,  1.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:25<00:00,  1.43s/it]

                   all        548      38759      0.441      0.353       0.35      0.206






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/30      18.9G      1.408      1.128     0.9879        501       1024: 100%|██████████| 405/405 [02:00<00:00,  3.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:05<00:00,  3.22it/s]

                   all        548      38759      0.445       0.36      0.358      0.213






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/30        26G      1.399      1.107      0.987        718       1024: 100%|██████████| 405/405 [02:58<00:00,  2.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:06<00:00,  2.62it/s]

                   all        548      38759      0.427      0.377      0.365      0.218






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/30      23.4G      1.379       1.09     0.9796        377       1024: 100%|██████████| 405/405 [03:24<00:00,  1.98it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:04<00:00,  4.41it/s]

                   all        548      38759      0.462      0.364       0.37      0.224






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/30      24.9G      1.381      1.081     0.9794        386       1024: 100%|██████████| 405/405 [02:43<00:00,  2.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.08it/s]

                   all        548      38759      0.471      0.379       0.38      0.226






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/30      19.5G      1.377      1.068     0.9769        560       1024: 100%|██████████| 405/405 [02:32<00:00,  2.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:04<00:00,  3.84it/s]

                   all        548      38759      0.456      0.378      0.377      0.228






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/30      22.6G      1.357       1.05     0.9719        693       1024: 100%|██████████| 405/405 [02:03<00:00,  3.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.35it/s]

                   all        548      38759       0.49      0.384       0.39      0.236






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/30      17.1G      1.361      1.045     0.9728        473       1024: 100%|██████████| 405/405 [09:34<00:00,  1.42s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:22<00:00,  1.24s/it]

                   all        548      38759       0.47      0.389      0.389      0.235






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/30      20.3G      1.359      1.039     0.9713        658       1024: 100%|██████████| 405/405 [05:00<00:00,  1.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:10<00:00,  1.68it/s]

                   all        548      38759      0.484      0.386      0.395       0.24






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/30      26.5G      1.345      1.025     0.9705        709       1024: 100%|██████████| 405/405 [01:40<00:00,  4.03it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.80it/s]

                   all        548      38759      0.487      0.382      0.392      0.237






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/30      26.5G      1.349      1.019     0.9671        579       1024: 100%|██████████| 405/405 [02:53<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.94it/s]

                   all        548      38759       0.48      0.393      0.402      0.243






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/30      19.1G       1.34      1.013     0.9661        314       1024: 100%|██████████| 405/405 [01:21<00:00,  5.00it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.83it/s]

                   all        548      38759      0.491      0.396      0.405      0.242






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/30      15.5G      1.336      1.003     0.9669        656       1024: 100%|██████████| 405/405 [01:00<00:00,  6.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.80it/s]

                   all        548      38759      0.507      0.389      0.402      0.242





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/30      21.6G      1.324     0.9968     0.9701        382       1024: 100%|██████████| 405/405 [01:25<00:00,  4.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.81it/s]

                   all        548      38759      0.506      0.389      0.404      0.245






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/30      18.5G       1.31     0.9732     0.9666        348       1024: 100%|██████████| 405/405 [01:15<00:00,  5.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:05<00:00,  3.03it/s]

                   all        548      38759      0.476      0.396      0.401      0.242






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/30      18.9G      1.305     0.9643     0.9639        409       1024: 100%|██████████| 405/405 [01:08<00:00,  5.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.71it/s]

                   all        548      38759      0.511      0.393      0.407      0.248






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/30      21.8G      1.299     0.9574     0.9621        277       1024: 100%|██████████| 405/405 [01:09<00:00,  5.81it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.92it/s]

                   all        548      38759      0.508      0.395      0.408      0.248






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/30      22.5G      1.296     0.9483     0.9624        244       1024: 100%|██████████| 405/405 [02:27<00:00,  2.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:09<00:00,  1.92it/s]

                   all        548      38759      0.511      0.399      0.412      0.251






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/30      23.4G       1.29     0.9404     0.9602        328       1024: 100%|██████████| 405/405 [01:36<00:00,  4.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:04<00:00,  4.48it/s]

                   all        548      38759      0.507      0.403      0.416      0.254






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/30      20.6G      1.283     0.9305      0.957        449       1024: 100%|██████████| 405/405 [01:08<00:00,  5.87it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.82it/s]

                   all        548      38759      0.513      0.408      0.418      0.253






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/30      19.8G      1.281     0.9275     0.9563        274       1024: 100%|██████████| 405/405 [01:18<00:00,  5.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:08<00:00,  2.06it/s]

                   all        548      38759      0.521      0.399      0.417      0.254






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/30      20.8G      1.275       0.92     0.9546        296       1024: 100%|██████████| 405/405 [01:27<00:00,  4.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:03<00:00,  5.37it/s]

                   all        548      38759      0.511      0.404      0.418      0.254






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/30      23.5G      1.271      0.915     0.9539        224       1024: 100%|██████████| 405/405 [01:19<00:00,  5.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  6.02it/s]

                   all        548      38759      0.526      0.402       0.42      0.256






30 epochs completed in 1.343 hours.
Optimizer stripped from runs\detect\visdrone_detection_fem_cbam3\weights\last.pt, 6.3MB
Optimizer stripped from runs\detect\visdrone_detection_fem_cbam3\weights\best.pt, 6.3MB

Validating runs\detect\visdrone_detection_fem_cbam3\weights\best.pt...
Ultralytics 8.3.40  Python-3.12.4 torch-2.5.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4080, 16376MiB)
Model summary (fused): 168 layers, 3,007,598 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:11<00:00,  1.52it/s]


                   all        548      38759      0.497      0.393      0.406       0.25
            pedestrian        520       8844      0.532      0.426      0.453      0.217
                people        482       5125      0.553      0.299      0.357      0.144
               bicycle        364       1287      0.364       0.17      0.167     0.0769
                   car        515      14064      0.664       0.79      0.803      0.574
                   van        421       1975      0.525      0.448       0.45      0.327
                 truck        266        750      0.469      0.382      0.374      0.264
              tricycle        337       1045      0.476      0.269      0.289      0.169
       awning-tricycle        220        532      0.277      0.203      0.165      0.112
                   bus        131        251      0.583      0.514      0.551      0.414
                 motor        485       4886      0.529      0.431      0.449      0.205
Speed: 0.1ms preproce