## To stratifying test dataset

In [6]:
import os
import shutil
import glob
import numpy as np
from tqdm import tqdm

# ================= CONFIGURATION =================
# 1. Input Path (Your main test set)
# Ensure this points to the folder containing ALL test labels (am + pm + off combined)
SOURCE_LABEL_DIR = '/mnt/Documents/Dad/github/DUP/DATA/Euljiro/4_peak_n_offpeak_combined/test'
SOURCE_IMAGE_DIR = '/mnt/Documents/Dad/github/DUP/DATA/Euljiro/4_peak_n_offpeak_combined/test'

# 2. Output Path
OUTPUT_ROOT = '/mnt/Documents/Dad/github/DUP/DATA/Euljiro/4_peak_n_offpeak_combined/stratified_test_sets'

# 3. NEW THRESHOLDS (Adjusted to find data)
DENSITY_THRESHOLDS = {
    'density_low':  (0, 2),    # 0 to 2 people
    'density_med':  (3, 6),    # 3 to 6 people
    'density_high': (7, 999)   # 7 or more people
}

OCCLUSION_THRESHOLDS = {
    'occlusion_low':  (0.0, 0.02),  # < 2% overlap
    'occlusion_med':  (0.02, 0.15), # 2% to 15% overlap
    'occlusion_high': (0.15, 1.0)   # > 15% overlap
}
# =================================================

def xywh2xyxy(x, y, w, h):
    x1 = x - w / 2
    y1 = y - h / 2
    x2 = x + w / 2
    y2 = y + h / 2
    return [x1, y1, x2, y2]

def compute_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection
    return intersection / union if union > 0 else 0

def get_frame_metrics(label_path):
    boxes = []
    if not os.path.exists(label_path): return 0, 0.0

    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 5:
                x, y, w, h = map(float, parts[1:5])
                boxes.append(xywh2xyxy(x, y, w, h))

    density = len(boxes)
    
    if density < 2:
        return density, 0.0
    
    ious = []
    for i in range(density):
        for j in range(i + 1, density):
            ious.append(compute_iou(boxes[i], boxes[j]))
            
    mean_iou = np.mean(ious) if ious else 0.0
    return density, mean_iou

def split_dataset():
    # Clean/Create Output Dirs
    if os.path.exists(OUTPUT_ROOT):
        shutil.rmtree(OUTPUT_ROOT)
    
    all_cats = list(DENSITY_THRESHOLDS.keys()) + list(OCCLUSION_THRESHOLDS.keys())
    for cat in all_cats:
        os.makedirs(os.path.join(OUTPUT_ROOT, cat), exist_ok=True)

    # Process Files
    label_files = glob.glob(os.path.join(SOURCE_LABEL_DIR, "*.txt"))
    print(f"Stratifying {len(label_files)} files to {OUTPUT_ROOT}...")

    stats = {cat: 0 for cat in all_cats}

    for label_file in tqdm(label_files):
        density, mean_iou = get_frame_metrics(label_file)
        
        filename = os.path.basename(label_file)
        img_name = filename.replace('.txt', '.jpg')
        src_img = os.path.join(SOURCE_IMAGE_DIR, img_name)
        
        # Handle png if jpg missing
        if not os.path.exists(src_img): src_img = src_img.replace('.jpg', '.png')
        if not os.path.exists(src_img): continue

        # Density Logic
        for cat, (low, high) in DENSITY_THRESHOLDS.items():
            if low <= density <= high:
                shutil.copy(src_img, os.path.join(OUTPUT_ROOT, cat, img_name))
                shutil.copy(label_file, os.path.join(OUTPUT_ROOT, cat, filename))
                stats[cat] += 1
                break 

        # Occlusion Logic
        for cat, (low, high) in OCCLUSION_THRESHOLDS.items():
            if low <= mean_iou < high:
                shutil.copy(src_img, os.path.join(OUTPUT_ROOT, cat, img_name))
                shutil.copy(label_file, os.path.join(OUTPUT_ROOT, cat, filename))
                stats[cat] += 1
                break

    print("\n=== NEW DISTRIBUTION ===")
    print(f"{'Category':<20} | {'Frames'}")
    print("-" * 30)
    for cat in DENSITY_THRESHOLDS.keys():
        print(f"{cat:<20} | {stats[cat]}")
    print("-" * 30)
    for cat in OCCLUSION_THRESHOLDS.keys():
        print(f"{cat:<20} | {stats[cat]}")

if __name__ == "__main__":
    split_dataset()

Stratifying 697 files to /mnt/Documents/Dad/github/DUP/DATA/Euljiro/4_peak_n_offpeak_combined/stratified_test_sets...


  0%|          | 0/697 [00:00<?, ?it/s]

100%|██████████| 697/697 [00:00<00:00, 2987.29it/s]


=== NEW DISTRIBUTION ===
Category             | Frames
------------------------------
density_low          | 494
density_med          | 179
density_high         | 24
------------------------------
occlusion_low        | 480
occlusion_med        | 194
occlusion_high       | 23





## Checking format.

In [7]:
import os
import shutil
import glob
from tqdm import tqdm

# ================= CONFIGURATION =================
# Path to your stratified folder
STRATIFIED_ROOT = '/mnt/Documents/Dad/github/DUP/DATA/Euljiro/4_peak_n_offpeak_combined/stratified_test_sets'

CATEGORIES = [
    'density_low', 'density_med', 'density_high',
    'occlusion_low', 'occlusion_med', 'occlusion_high'
]
# =================================================

def restructure_folders():
    print(f"Fixing folder structure in: {STRATIFIED_ROOT}")

    for cat in CATEGORIES:
        cat_dir = os.path.join(STRATIFIED_ROOT, cat)
        if not os.path.exists(cat_dir): continue

        # Create 'images' and 'labels' subfolders
        images_dir = os.path.join(cat_dir, 'images')
        labels_dir = os.path.join(cat_dir, 'labels')
        os.makedirs(images_dir, exist_ok=True)
        os.makedirs(labels_dir, exist_ok=True)

        # Move Images
        for ext in ['*.jpg', '*.png', '*.jpeg']:
            for img_file in glob.glob(os.path.join(cat_dir, ext)):
                shutil.move(img_file, os.path.join(images_dir, os.path.basename(img_file)))
        
        # Move Labels
        for txt_file in glob.glob(os.path.join(cat_dir, '*.txt')):
            shutil.move(txt_file, os.path.join(labels_dir, os.path.basename(txt_file)))

        print(f"  Processed {cat}: Separated images & labels.")

    print("\n✅ Done! Folders are now compatible with YOLO.")

if __name__ == "__main__":
    restructure_folders()

Fixing folder structure in: /mnt/Documents/Dad/github/DUP/DATA/Euljiro/4_peak_n_offpeak_combined/stratified_test_sets
  Processed density_low: Separated images & labels.
  Processed density_med: Separated images & labels.
  Processed density_high: Separated images & labels.
  Processed occlusion_low: Separated images & labels.
  Processed occlusion_med: Separated images & labels.
  Processed occlusion_high: Separated images & labels.

✅ Done! Folders are now compatible with YOLO.


## Running evaluation

In [16]:
import os
import subprocess
import yaml
import pandas as pd
import glob

# ================= CONFIGURATION =================
STRATIFIED_ROOT = '/mnt/Documents/Dad/github/DUP/DATA/Euljiro/4_peak_n_offpeak_combined/stratified_test_sets'
WEIGHTS_PATH = '/mnt/Documents/Dad/github/DUP/yolov9_udp/results_T_ITS_2026/Euljiro/v9_320_peak_n_off_peak_TrainVal_epoch_70_seed0/weights/last.pt'

CATEGORIES = [
    'density_low', 'density_med', 'density_high',
    'occlusion_low', 'occlusion_med', 'occlusion_high'
]
# =================================================

def create_yaml(category, root_path):
    abs_img_dir = os.path.abspath(os.path.join(root_path, category, 'images'))
    yaml_path = f"data_test_{category}.yaml"
    content = {
        'path': os.path.abspath(os.path.join(root_path, category)),
        'train': abs_img_dir,
        'val': abs_img_dir,
        'test': abs_img_dir,
        'names': {0: 'U', 1: 'D', 2: 'P'}
    }
    with open(yaml_path, 'w') as f:
        yaml.dump(content, f)
    return yaml_path

def parse_map_from_csv(run_name):
    """Tries to read the results.csv file if it exists."""
    # YOLO saves to runs/val/{run_name}/results.csv usually
    csv_path = f"runs/val/{run_name}/results.csv"
    if os.path.exists(csv_path):
        try:
            # Read CSV (YOLO CSVs usually have header row)
            df = pd.read_csv(csv_path)
            # Columns are often: epoch, box_loss, ..., metrics/mAP_0.5, ...
            # We look for the column containing 'mAP_0.5'
            for col in df.columns:
                if 'mAP_0.5' in col and '0.95' not in col: # Find exact mAP@0.5 column
                    return float(df.iloc[0][col])
            # Fallback if column names differ
            return float(df.iloc[0, 6]) # Index 6 is usually mAP@0.5
        except Exception:
            return None
    return None

def parse_map_from_stdout(output_text):
    """Parses text output as fallback."""
    lines = output_text.split('\n')
    for line in lines:
        parts = line.split()
        if len(parts) > 5 and parts[0] == 'all':
            try:
                return float(parts[5])
            except ValueError:
                continue
    return None

print(f"{'CATEGORY':<20} | {'mAP@0.5 (%)':<15} | {'Frames'}")
print("-" * 50)

for category in CATEGORIES:
    # 1. Setup
    img_dir = os.path.join(STRATIFIED_ROOT, category, 'images')
    if not os.path.exists(img_dir) or not os.listdir(img_dir):
        print(f"{category:<20} | {'N/A (Empty)':<15} | 0")
        continue
    
    num_frames = len(os.listdir(img_dir))
    yaml_file = create_yaml(category, STRATIFIED_ROOT)
    run_name = f"eval_{category}"
    
    # 2. Run Command
    cmd = [
        "python", "val.py",
        "--data", yaml_file,
        "--weights", WEIGHTS_PATH,
        "--img", "320",
        "--batch", "32",
        "--device", "0",
        "--task", "val",
        "--name", run_name,
        "--exist-ok",
        "--save-txt", # Force saving results
        "--verbose"   # Ensure table is printed
    ]
    
    try:
        # Capture stderr AND stdout
        res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
        
        # 3. Attempt Parsing (CSV first, then Text)
        score = parse_map_from_csv(run_name)
        
        if score is None:
            score = parse_map_from_stdout(res.stdout)
            
        if score is not None:
            print(f"{category:<20} | {score*100:<15.2f} | {num_frames}")
        else:
            print(f"{category:<20} | {'0.00 (Failed)':<15} | {num_frames}")
            # Optional: Print error log if needed
            # print(res.stdout[-500:]) 

    except Exception as e:
        print(f"{category:<20} | {'Error':<15} | {num_frames}")

print("-" * 50)

CATEGORY             | mAP@0.5 (%)     | Frames
--------------------------------------------------
density_low          | 97.60           | 494
density_med          | 97.50           | 179
density_high         | 96.70           | 24
occlusion_low        | 97.70           | 480
occlusion_med        | 97.60           | 194
occlusion_high       | 96.20           | 23
--------------------------------------------------
