<a href="https://colab.research.google.com/github/aiqicheng/EmotionTeller/blob/main/DetectionYOLO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')


# --- install and import packages ---
!pip install -q ultralytics
from ultralytics import YOLO
from pathlib import Path
import pandas as pd
import numpy as np
import ast
import shutil, os, json, urllib.request
from tqdm import tqdm
import torch
from sklearn.model_selection import train_test_split


# --- define paths ---

# working directory, change to your path
data_folder = "/content/drive/MyDrive/emo/"
# folder containing images and metat data csv
image_folder = data_folder+"ImageData/"
df_path = data_folder+"train_meta.csv"
df = pd.read_csv(df_path)
# df_path_test = data_folder+"test_meta.csv"
if not os.path.exists(image_folder):
  os.warn("image folder not found")
# Define file paths
model_folder = data_folder+"/BaselineModels/"



Mounted at /content/drive
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.1/1.1 MB[0m [31m66.8 MB/s[0m eta [36m0:00:00[0m
[?25hCreating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


# Detect Faces

## set up dataset and config file

In [None]:
# df_results columns assumed: file_name (str), bboxes_pct_labelled (list of [x%,y%,w%,h%])
# If your file_name is relative, set IMAGE_ROOT accordingly:
IMAGE_ROOT = Path(image_folder)  # change to your path
DATASET = Path(model_folder+"ImageData_yolo")  # where we‚Äôll write YOLO data

# --- transform objects column ---
df['bboxes_pct_labelled'] = df['objects'].apply(lambda x: ast.literal_eval(x)['bbox'])

# --- prepare yolo file structure ---
def to_yolo_rows(bboxes_pct):
    rows = []
    for (x_pct, y_pct, w_pct, h_pct) in (bboxes_pct or []):
        x, y, w, h = x_pct/100.0, y_pct/100.0, w_pct/100.0, h_pct/100.0
        xc, yc = x + w/2.0, y + h/2.0
        # Clamp just in case
        xc = min(max(xc, 0.0), 1.0)
        yc = min(max(yc, 0.0), 1.0)
        w  = min(max(w,  0.0), 1.0)
        h  = min(max(h,  0.0), 1.0)
        rows.append(f"0 {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}")  # single class: face -> id 0
    return rows

def make_split_folders(root):
    for sub in ["images/train","images/val","labels/train","labels/val"]:
        Path(root/sub).mkdir(parents=True, exist_ok=True)

def copy_and_write_label(img_src, label_rows, split, root=DATASET):
    img_src = Path(img_src)
    if not img_src.is_absolute():
        img_src = IMAGE_ROOT / img_src
    if not img_src.exists():
        print(f"[warn] missing image: {img_src}")
        return
    dst_img = root / f"images/{split}/{img_src.name}"
    shutil.copy2(img_src, dst_img)
    (root / f"labels/{split}/{img_src.stem}.txt").write_text("\n".join(label_rows))

if not os.path.exists(DATASET):

  DATASET.mkdir(parents=True, exist_ok=True)

  make_split_folders(DATASET)

  train_df, val_df = train_test_split(df, frac_train=0.8, seed=42)

  for split_name, d in [("train", train_df), ("val", val_df)]:
      for _, row in tqdm(d.iterrows(), desc=f"Processing {split_name} images"):
          file_name = row["file_name"]
          gt_boxes  = row.get("bboxes_pct_labelled") or []
          yolo_rows = to_yolo_rows(gt_boxes)
          copy_and_write_label(file_name, yolo_rows, split_name, root=DATASET)

In [None]:
# --- make yaml file ---
yaml_path = DATASET/"faces.yaml"
if not yaml_path.exists():
  (yaml_path).write_text(f"""
  path: {DATASET}
  train: images/train
  val: images/val
  names: [face]
  """)
print(yaml_path.read_text())


path: /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo
train: images/train
val: images/val
names: [face]



## inference with pretrained yolov8-face

In [None]:
# --- prepare or download model ---
weights = model_folder+"yolov8n-face.pt"
if not os.path.exists(weights):
  # download yolov8n-face.pt from Ultralytics github releases
  url = "https://github.com/lindevs/yolov8-face/releases/latest/download/yolov8n-face-lindevs.pt"
  weights = os.path.join(model_folder, "yolov8n-face.pt")
  print("Downloading YOLOv8-Face weights...")
  urllib.request.urlretrieve(url, weights)

# yolo_weights = YOLO(weights)
model = YOLO(weights)

# --- run inference on a folder or a file ---
pred = model.predict(
    source=str(DATASET/"images/val"),
    imgsz=640,
    conf=0.25,
    save=True,         # saves annotated images under runs/detect/predict
    project=model_folder+"runs",
    name="detect_yolo8n_face_val_predict",
    exist_ok=True,
    save_txt=False
)




image 1/36 /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/images/val/05333b2b99254bb98670bf10f98089d4.jpg: 256x640 11 faces, 303.5ms
image 2/36 /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/images/val/12_Group_Group_12_Group_Group_12_141.jpg: 640x640 4 faces, 280.0ms
image 3/36 /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/images/val/12_Group_Group_12_Group_Group_12_238.jpg: 480x640 3 faces, 175.4ms
image 4/36 /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/images/val/12_Group_Group_12_Group_Group_12_633.jpg: 512x640 4 faces, 203.1ms
image 5/36 /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/images/val/12_Group_Group_12_Group_Group_12_81.jpg: 416x640 15 faces, 166.3ms
image 6/36 /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/images/val/17_Ceremony_Ceremony_17_288.jpg: 448x640 28 faces, 163.7ms
image 7/36 /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/images/val/20_Family_Group_Family_Group_20_945.jpg: 480x640 7 face

In [None]:
metrics = model.val(
    data=str(DATASET/"faces.yaml"),  # points to YAML with val path
    imgsz=640,
    split="val",    # explicitly specify which split to use (YOLOv8>=v8.1)
    conf=0.25,
    project=model_folder+"runs", # Save evaluation results to the same project folder
    name="detect_yolo8n_face_val_eval" # Save evaluation results to the same named folder
)

print(metrics.box.map50)      # mAP@0.5
print(metrics.box.map)        # mAP@0.5:0.95

Ultralytics 8.3.224 üöÄ Python-3.12.12 torch-2.8.0+cu126 CPU (Intel Xeon CPU @ 2.20GHz)
[34m[1mval: [0mFast image access ‚úÖ (ping: 1.3¬±0.8 ms, read: 19.1¬±15.8 MB/s, size: 78.8 KB)
[K[34m[1mval: [0mScanning /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/labels/val.cache... 36 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 36/36 21.1Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 3/3 0.2it/s 12.0s
                   all         36        223        0.7      0.711      0.746      0.235
Speed: 9.6ms preprocess, 285.9ms inference, 0.0ms loss, 0.6ms postprocess per image
Results saved to [1m/content/drive/MyDrive/emo/BaselineModels/runs/detect_yolo8n_face_infer2[0m
0.7456185627673727
0.23461991854227673


## finetune with train split

In [None]:
# use the same model
weights = model_folder+"yolov8n-face.pt"
if not os.path.exists(weights):
  # download yolov8n-face.pt from Ultralytics github releases
  url = "https://github.com/lindevs/yolov8-face/releases/latest/download/yolov8n-face-lindevs.pt"
  weights = os.path.join(model_folder, "yolov8n-face.pt")
  print("Downloading YOLOv8-Face weights...")
  urllib.request.urlretrieve(url, weights)

# yolo_weights = YOLO(weights)
model = YOLO(weights)


results = model.train(
    data=str(DATASET/"faces.yaml"),
    epochs=100,
    imgsz=640,
    batch=16,
    lr0=0.01,            # starting LR; you can keep defaults too
    patience=20,         # early stopping
    optimizer="SGD",     # default is AdamW; SGD often works well for YOLO
    pretrained=True,
    project=model_folder+"runs",
    name="detect_finetune_yolo8n_face_train",
    exist_ok=True
)
# evaluate
metrics = model.val(
    data=str(DATASET/"faces.yaml"),
    imgsz=640,
    project=model_folder+"runs",
    name="detect_finetune_yolo8n_face_train_eval",
    exist_ok=True
)
print(metrics)  # includes mAP50 and mAP50-95


Ultralytics 8.3.224 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/faces.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=/content/drive/MyDrive/emo//BaselineModels/yolov8n-face.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=detect_finetune_yolo8n

In [None]:
YOLO(best_weights_path).predict(
    source=str(DATASET/"images/val"),
    imgsz=640,
    conf=0.25,
    save=False,
    verbose=False # Suppress detailed output
)

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'face'}
 obb: None
 orig_img: array([[[ 93, 125, 144],
         [ 95, 127, 146],
         [ 95, 127, 146],
         ...,
         [  3,   5,   6],
         [  3,   5,   6],
         [  2,   4,   5]],
 
        [[ 92, 124, 143],
         [ 94, 126, 145],
         [ 95, 127, 146],
         ...,
         [  7,   9,  10],
         [  6,   8,   9],
         [  6,   8,   9]],
 
        [[ 90, 122, 141],
         [ 92, 124, 143],
         [ 94, 126, 145],
         ...,
         [  8,  10,  11],
         [  8,  10,  11],
         [  7,   9,  10]],
 
        ...,
 
        [[  5,   8,  12],
         [  5,   8,  12],
         [  5,   8,  12],
         ...,
         [ 10,  14,  19],
         [ 11,  15,  20],
         [ 11,  15,  20]],
 
        [[  5,   8,  12],
         [  5,   8,  12],
         [  5,   8,  12],
         ...,
         [ 10,  14, 

## compare with yolo11, general purpose detection

In [None]:
# --- inference only ---
weights11 = model_folder+"yolov11n-face.pt"
if not os.path.exists(weights_yolo11n):
  URL="https://github.com/YapaLab/yolo-face/releases/download/v0.0.0/yolov11n-face.pt"
  urllib.request.urlretrieve(URL, weights11)

model = YOLO(weights11)

metrics = model.val(
    data=str(DATASET/"faces.yaml"),  # points to YAML with val path
    imgsz=640,
    split="val",    # explicitly specify which split to use (YOLOv8>=v8.1)
    conf=0.25,
    project=model_folder+"runs", # Save evaluation results to the same project folder
    name="detect_yolo11n_face_val_eval" # Save evaluation results to the same named folder
)

print(metrics.box.map50)      # mAP@0.5
print(metrics.box.map)        # mAP@0.5:0.95

Ultralytics 8.3.224 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
YOLO11n summary (fused): 100 layers, 2,582,347 parameters, 0 gradients, 6.3 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.3¬±0.1 ms, read: 48.0¬±24.1 MB/s, size: 69.5 KB)
[K[34m[1mval: [0mScanning /content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/labels/val.cache... 36 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 36/36 71.8Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 3/3 1.9it/s 1.6s
                   all         36        223      0.675      0.699      0.686      0.222
Speed: 4.1ms preprocess, 2.9ms inference, 0.0ms loss, 1.9ms postprocess per image
Results saved to [1m/content/drive/MyDrive/emo/BaselineModels/runs/detect_yolo11n_val_eval3[0m
0.6862596875818596
0.22196242702274227


In [None]:
# --- finetune ---
weights11 = model_folder+"yolov11n-face.pt"
if not os.path.exists(weights_yolo11n):
  URL="https://github.com/YapaLab/yolo-face/releases/download/v0.0.0/yolov11n-face.pt"
  urllib.request.urlretrieve(URL, weights11)

model = YOLO(weights11)


results = model.train(
    data=str(DATASET/"faces.yaml"),
    epochs=100,
    imgsz=640,
    batch=16,
    lr0=0.01,            # starting LR; you can keep defaults too
    patience=20,         # early stopping
    optimizer="SGD",     # default is AdamW; SGD often works well for YOLO
    pretrained=True,
    project=model_folder+"runs",
    name="detect_finetune_yolo11n_face_train",
    verbose = False,
    exist_ok=True
)
# evaluate
metrics = model.val(
    data=str(DATASET/"faces.yaml"),
    imgsz=640,
    project=model_folder+"runs",
    name="detect_finetune_yolo11n_face_train_eval",
    exist_ok=True
)
print(metrics)  # includes mAP50 and mAP50-95


Ultralytics 8.3.224 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/emo/BaselineModels/ImageData_yolo/faces.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=/content/drive/MyDrive/emo//BaselineModels/yolov11n-face.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=detect_finetune_yolo1

# Detect Certain Faces
Now that we've seen the model is likely to detect faces. How is it at detecting certain faces?