### 1. 데이터 구조 확인 및 불러오기

In [20]:
import pandas as pd

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# 상위 5개 데이터 확인
print("Train Sample:")
print(train_df.head())

print("\nTest Sample:")
print(test_df.head())

Train Sample:
   id          img_path            artist
0   0  ./train/0000.jpg   Diego Velazquez
1   1  ./train/0001.jpg  Vincent van Gogh
2   2  ./train/0002.jpg      Claude Monet
3   3  ./train/0003.jpg       Edgar Degas
4   4  ./train/0004.jpg  Hieronymus Bosch

Test Sample:
           id               img_path
0  TEST_00000  ./test/TEST_00000.jpg
1  TEST_00001  ./test/TEST_00001.jpg
2  TEST_00002  ./test/TEST_00002.jpg
3  TEST_00003  ./test/TEST_00003.jpg
4  TEST_00004  ./test/TEST_00004.jpg


### 2. 폴더 구조 생성 및 이미지 복사

In [21]:
import os
import shutil
from sklearn.model_selection import train_test_split

# 이미지 기본 경로
base_dir = './'
train_img_dir = os.path.join(base_dir, 'train_images')  # ./train_images
os.makedirs(train_img_dir, exist_ok=True)

# 새로 만들 YOLO 학습용 폴더 구조
yolo_train_dir = './yolo_dataset/train'
yolo_val_dir = './yolo_dataset/val'

# 디렉토리 생성
for d in [yolo_train_dir, yolo_val_dir]:
    os.makedirs(d, exist_ok=True)

# train/val 나누기
train_split, val_split = train_test_split(train_df, test_size=0.2, stratify=train_df['artist'], random_state=42)

# 이미지 복사 함수
def copy_images(split_df, split_dir):
    for _, row in split_df.iterrows():
        label = row['artist']
        src_path = row['img_path']
        dst_dir = os.path.join(split_dir, label)
        os.makedirs(dst_dir, exist_ok=True)
        dst_path = os.path.join(dst_dir, os.path.basename(src_path))
        shutil.copy(src_path, dst_path)

copy_images(train_split, yolo_train_dir)
copy_images(val_split, yolo_val_dir)

### 3. YOLOv11s 분류 모델 학습

In [22]:
from ultralytics import YOLO

# YOLOv11s classification 모델 불러오기
model = YOLO('yolo11s-cls.pt')  # YOLOv11s는 초경량 모델

# 학습
model.train(
    data='yolo_dataset',        # train/val 폴더 들어있는 디렉토리
    epochs=20,
    imgsz=224,
    batch=32,
    project='art_classify',     # 저장될 디렉토리 이름
    name='yolo11s-train',
    patience=10                 # 조기종료
)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s-cls.pt to 'yolo11s-cls.pt'...


100%|██████████| 13.0M/13.0M [00:00<00:00, 31.2MB/s]


Ultralytics 8.3.115 🚀 Python-3.9.22 torch-2.7.0 CPU (Apple M3 Pro)
[34m[1mengine/trainer: [0mtask=classify, mode=train, model=yolo11s-cls.pt, data=yolo_dataset, epochs=20, time=None, patience=10, batch=32, imgsz=224, save=True, save_period=-1, cache=False, device=None, workers=8, project=art_classify, name=yolo11s-train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, line_wi

[34m[1mtrain: [0mScanning /Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/yolo_dataset/train... 4728 images, 0 corrupt: 100%|██████████| 4728/4728 [00:00<00:00, 7119.18it/s]

[34m[1mtrain: [0mNew cache created: /Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/yolo_dataset/train.cache
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1001.7±466.0 MB/s, size: 287.6 KB)



[34m[1mval: [0mScanning /Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/yolo_dataset/val... 1183 images, 0 corrupt: 100%|██████████| 1183/1183 [00:00<00:00, 8822.77it/s]


[34m[1mval: [0mNew cache created: /Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/yolo_dataset/val.cache
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000185, momentum=0.9) with parameter groups 39 weight(decay=0.0), 40 weight(decay=0.0005), 40 bias(decay=0.0)




[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 224 train, 224 val
Using 0 dataloader workers
Logging results to [1mart_classify/yolo11s-train[0m
Starting training for 20 epochs...

      Epoch    GPU_mem       loss  Instances       Size


       1/20         0G      3.618         24        224: 100%|██████████| 148/148 [05:21<00:00,  2.18s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:41<00:00,  2.16s/it]

                   all      0.311      0.599

      Epoch    GPU_mem       loss  Instances       Size



       2/20         0G      2.308         24        224: 100%|██████████| 148/148 [05:17<00:00,  2.15s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:40<00:00,  2.15s/it]

                   all      0.566      0.835

      Epoch    GPU_mem       loss  Instances       Size



       3/20         0G      1.552         24        224: 100%|██████████| 148/148 [05:20<00:00,  2.17s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:41<00:00,  2.16s/it]

                   all      0.682      0.897

      Epoch    GPU_mem       loss  Instances       Size



       4/20         0G      1.155         24        224: 100%|██████████| 148/148 [05:14<00:00,  2.13s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:39<00:00,  2.09s/it]

                   all       0.71      0.924

      Epoch    GPU_mem       loss  Instances       Size



       5/20         0G     0.8789         24        224: 100%|██████████| 148/148 [04:37<00:00,  1.88s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.61s/it]

                   all      0.734      0.937

      Epoch    GPU_mem       loss  Instances       Size



       6/20         0G     0.7122         24        224: 100%|██████████| 148/148 [03:59<00:00,  1.62s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.62s/it]

                   all      0.756      0.944

      Epoch    GPU_mem       loss  Instances       Size



       7/20         0G     0.5875         24        224: 100%|██████████| 148/148 [03:58<00:00,  1.61s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.59s/it]

                   all      0.759      0.948

      Epoch    GPU_mem       loss  Instances       Size



       8/20         0G     0.5029         24        224: 100%|██████████| 148/148 [03:56<00:00,  1.60s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.63s/it]

                   all      0.774       0.95

      Epoch    GPU_mem       loss  Instances       Size



       9/20         0G     0.4102         24        224: 100%|██████████| 148/148 [03:56<00:00,  1.60s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.60s/it]

                   all      0.775      0.952

      Epoch    GPU_mem       loss  Instances       Size



      10/20         0G     0.3769         24        224: 100%|██████████| 148/148 [03:56<00:00,  1.60s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.58s/it]

                   all      0.779      0.951

      Epoch    GPU_mem       loss  Instances       Size



      11/20         0G      0.323         24        224: 100%|██████████| 148/148 [03:54<00:00,  1.58s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:29<00:00,  1.58s/it]

                   all      0.776      0.946

      Epoch    GPU_mem       loss  Instances       Size



      12/20         0G     0.2846         24        224: 100%|██████████| 148/148 [03:54<00:00,  1.58s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.58s/it]

                   all      0.784      0.948

      Epoch    GPU_mem       loss  Instances       Size



      13/20         0G     0.2618         24        224: 100%|██████████| 148/148 [03:54<00:00,  1.58s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.58s/it]

                   all      0.799      0.959

      Epoch    GPU_mem       loss  Instances       Size



      14/20         0G     0.2361         24        224: 100%|██████████| 148/148 [03:53<00:00,  1.58s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.59s/it]

                   all      0.788      0.959

      Epoch    GPU_mem       loss  Instances       Size



      15/20         0G     0.2139         24        224: 100%|██████████| 148/148 [03:57<00:00,  1.60s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.59s/it]

                   all      0.801      0.958

      Epoch    GPU_mem       loss  Instances       Size



      16/20         0G     0.1858         24        224: 100%|██████████| 148/148 [03:52<00:00,  1.57s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:29<00:00,  1.57s/it]

                   all      0.803      0.957

      Epoch    GPU_mem       loss  Instances       Size



      17/20         0G     0.1667         24        224: 100%|██████████| 148/148 [03:52<00:00,  1.57s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:29<00:00,  1.57s/it]

                   all      0.801      0.959

      Epoch    GPU_mem       loss  Instances       Size



      18/20         0G     0.1596         24        224: 100%|██████████| 148/148 [03:53<00:00,  1.58s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:29<00:00,  1.56s/it]

                   all      0.815      0.959

      Epoch    GPU_mem       loss  Instances       Size



      19/20         0G     0.1668         24        224: 100%|██████████| 148/148 [03:56<00:00,  1.60s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:30<00:00,  1.60s/it]

                   all      0.811      0.959

      Epoch    GPU_mem       loss  Instances       Size



      20/20         0G     0.1543         24        224: 100%|██████████| 148/148 [03:56<00:00,  1.60s/it]
               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:29<00:00,  1.58s/it]

                   all       0.81      0.958

20 epochs completed in 1.593 hours.
Optimizer stripped from art_classify/yolo11s-train/weights/last.pt, 11.1MB
Optimizer stripped from art_classify/yolo11s-train/weights/best.pt, 11.1MB

Validating art_classify/yolo11s-train/weights/best.pt...
Ultralytics 8.3.115 🚀 Python-3.9.22 torch-2.7.0 CPU (Apple M3 Pro)
YOLO11s-cls summary (fused): 47 layers, 5,498,178 parameters, 0 gradients, 12.0 GFLOPs





[34m[1mtrain:[0m /Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/yolo_dataset/train... found 4728 images in 50 classes ✅ 
[34m[1mval:[0m /Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/yolo_dataset/val... found 1183 images in 50 classes ✅ 
[34m[1mtest:[0m None...


               classes   top1_acc   top5_acc: 100%|██████████| 19/19 [00:28<00:00,  1.51s/it]


                   all      0.815      0.959
Speed: 0.0ms preprocess, 17.7ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mart_classify/yolo11s-train[0m


ultralytics.utils.metrics.ClassifyMetrics object with attributes:

confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x30a5ff7c0>
curves: []
curves_results: []
fitness: 0.8867286741733551
keys: ['metrics/accuracy_top1', 'metrics/accuracy_top5']
results_dict: {'metrics/accuracy_top1': 0.814877450466156, 'metrics/accuracy_top5': 0.9585798978805542, 'fitness': 0.8867286741733551}
save_dir: PosixPath('art_classify/yolo11s-train')
speed: {'preprocess': 0.00015359847740805923, 'inference': 17.746346716820813, 'loss': 1.3910399334893318e-05, 'postprocess': 3.1591712851242644e-05}
task: 'classify'
top1: 0.814877450466156
top5: 0.9585798978805542

### 4. test

In [24]:
from ultralytics import YOLO

# 학습된 모델 불러오기
model = YOLO('/Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/art_classify/yolo11s-train/weights/best.pt')

In [25]:
# test 이미지 폴더 경로
results = model.predict(source="/Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/test", save=False, imgsz=224)


inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

image 1/12670 /Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/test/TEST_00000.jpg: 224x224 Jan van Eyck 0.16, Kazimir Malevich 0.14, Henri Rousseau 0.13, Gustave Courbet 0.11, Raphael 0.11, 10.9ms
image 2/12670 /Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/test/TEST_00001.jpg: 224x224 Edgar Degas 0.44, Amedeo Modigliani 0.35, Pablo Picasso 0.12, Diego Rivera 0.03, Gustav Klimt 0.01, 9.3ms
image 3/12670 /Users/kimdohyeon/건양대학교병원_바이오헬스/

In [26]:
# 클래스 이름 리스트
class_names = model.names

# 이미지별 예측 클래스 (가장 확률 높은 클래스 index → 이름으로 변환)
predicted_labels = [class_names[int(r.probs.top1)] for r in results]

### 5. Submission.csv

In [27]:
import pandas as pd

test_df = pd.read_csv("/Users/kimdohyeon/건양대학교병원_바이오헬스/Biomedical_AI_Train/Dacon_art/test.csv")
test_df["artist"] = predicted_labels

submission = test_df[["id", "artist"]]
submission.to_csv("submission_yolov11s.csv", index=False)