<a href="https://colab.research.google.com/github/felipe-aveiro/MMHD-project/blob/main/MMHD_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🛠️🗃️ **Install dependencies, import libraries, and remove "sample_data" folder**

In [None]:
print("START!\n")
# Silently install and remove files
!pip install ultralytics > /dev/null 2>&1
!rm -r sample_data > /dev/null 2>&1

# Silently import everything
import contextlib
with contextlib.redirect_stdout(None), contextlib.redirect_stderr(None):
    import ultralytics
    from ultralytics import YOLO
    from google.colab import files
    import matplotlib.pyplot as plt
    from PIL import Image
    import os
    import random
    import shutil
    import time
    from tqdm import tqdm
    import torch

print("📢 CUDA available:", torch.cuda.is_available())
print("🔍 Device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")


print("\n✅ Installations and imports done!")

# 💾📥 **Download MID-3K dataset**

In [None]:
!git clone https://github.com/felipe-aveiro/MMHD-project.git

print("\n✅ Repository was successfully cloned!")

# ⚙️🛠️ **Split dataset into train (65%), validation (15%), and test (20%)**

In [None]:
# Base directory
BASE_DIR = "/content/MMHD-project/MID-3K"

def process_dataset(BASE_DIR, modality):
    print(f"\n🚀 Processing modality: {modality.upper()}")

    # Paths to the images and labels of the selected modality
    images_dir = os.path.join(BASE_DIR, "dataset", modality, "whole-dataset", modality, "images")
    labels_dir = os.path.join(BASE_DIR, "dataset", modality, "whole-dataset", modality, "labels")

    # Build output directories for a given split (train, val, test)
    def build_output_path(split):
        return {
            "images": os.path.join(BASE_DIR, "dataset", modality, split, modality, "images"),
            "labels": os.path.join(BASE_DIR, "dataset", modality, split, modality, "labels"),
        }

    # Collect and shuffle all image files
    image_files = [f for f in os.listdir(images_dir) if f.endswith('.png')]
    random.shuffle(image_files)

    # Split ratios
    train_percentage = 0.65
    val_percentage = 0.15
    test_percentage = 0.20

    # Calculate the split sizes
    total = len(image_files)
    train_split = int(total * train_percentage)
    val_split = int(total * val_percentage)
    test_split = total - train_split - val_split # ensure total coverage

    # Lists for each split
    train_files = image_files[:train_split]
    val_files = image_files[train_split:train_split + val_split]
    test_files = image_files[train_split + val_split:]

    print(f"📊 Total {modality} images: {total}\n")

    print(f"🟢 Train: {len(train_files)}")
    print(f"🔵 Validation: {len(val_files)}")
    print(f"🟣 Test: {len(test_files)}")

 # Delete and recreate output folders
    def prepare_output_folders(splits=["train", "val", "test"]):
        for split in splits:
            paths = build_output_path(split)
            for subpath in paths.values():
                if os.path.exists(subpath):
                    shutil.rmtree(subpath)
                os.makedirs(subpath, exist_ok=True)

    prepare_output_folders()

    # Copy images and corresponding labels to each split folder
    def copy_files(file_list, split):
        paths = build_output_path(split)
        print(f"\n📦 Copying {split} set ({len(file_list)} files):")
        for img_file in tqdm(file_list, desc=f"→ {split.capitalize()} progress", unit=" images"):
            label_file = os.path.splitext(img_file)[0] + ".txt"
            src_img = os.path.join(images_dir, img_file)
            src_lbl = os.path.join(labels_dir, label_file)
            dst_img = os.path.join(paths["images"], img_file)
            dst_lbl = os.path.join(paths["labels"], label_file)

            if os.path.exists(src_img) and os.path.exists(src_lbl):
                shutil.copy2(src_img, dst_img)
                shutil.copy2(src_lbl, dst_lbl)
            else:
                print(f"⚠️ Skipped (missing image or label): {img_file}")

    # Run the file copying process and measure time
    start_time = time.perf_counter()
    copy_files(train_files, "train")
    copy_files(val_files, "val")
    copy_files(test_files, "test")
    elapsed = time.perf_counter() - start_time
    print(f"\n\n🔔 {modality.upper()} dataset split completed in {elapsed:.2f} seconds!")

# Process RGB and Thermal datasets
process_dataset(BASE_DIR, "rgb")
process_dataset(BASE_DIR, "thermal")

# ⚙️🧰 **Generate dataset configuration files (rgb.yaml & thermal.yaml)**

In [None]:
# RGB YAML
rgb_yaml_path = os.path.join(BASE_DIR, "dataset", "rgb.yaml")
rgb_yaml_content = f"""
path: {BASE_DIR}/dataset/rgb
train: train/rgb
val: val/rgb
test: test/rgb
names:
  0: human
"""

# THERMAL YAML
thermal_yaml_path = os.path.join(BASE_DIR, "dataset", "thermal.yaml")
thermal_yaml_content = f"""
path: {BASE_DIR}/dataset/thermal
train: train/thermal
val: val/thermal
test: test/thermal
names:
  0: human
"""

# Save
with open(rgb_yaml_path, "w") as f:
    f.write(rgb_yaml_content.strip())

with open(thermal_yaml_path, "w") as f:
    f.write(thermal_yaml_content.strip())

print("✅ Checkpoint:\n")
print("\t📄 ", rgb_yaml_path)
print("\t📄 ", thermal_yaml_path)

# 🚀🧠🌈🖼️ **Create YOLO RGB model, train and download**

In [None]:
#REMOVE OTHERS RGB FOLDERS
if os.path.exists("RML-project-MMHD/rgb"):
    shutil.rmtree("RML-project-MMHD/rgb")

# Training parameters
# NUMBER OF EPOCHS TO TRAIN
num_epochs_rgb = 50

# LOAD YOLOv8 SMALL MODEL FOR RGB MODALITY (nano, small, medium...)
model_rgb = YOLO('yolov8s.pt') # LOAD YOLO MODEL FOR TRAINNING Ex.: yolov8n.pt, yolov8s.pt, yolov8m.pt ...

rgb_yaml_path = os.path.join(BASE_DIR, "dataset", "rgb.yaml")

# Start training
#https://docs.ultralytics.com/usage/cfg/#train-settings
model_rgb.train(
    pretrained = False, # DEFINE IF USE PRETRAINED WEIGHTS
    data = rgb_yaml_path, # DATASET CONFIG FILE
    epochs = num_epochs_rgb, #NUMBER OF EPOCHS
    device = 0, # USE GPU
    patience = num_epochs_rgb, # SET patience = num_epochs_rgb TO DISABLE EARLY STOP
    imgsz = 640, # TO REZISE IMAGES, DEFAULT 640
    save = True, # TO SAVE CHECKPOINTS AND FINAL MODEL WEIGHTS
    project='RML-project-MMHD', #NAME OF PROJECT
    name = 'rgb', # SUB-NAME OF PROJECT or MODALITY
    plots = True # TO SHOW PLOTS OF TRAINING AND VALIDATION METRICS
)

# 🚀🧠🔥🌡️ **Create YOLO thermal model, train and download**

In [None]:
#REMOVE OTHERS THERMAL FOLDERS
if os.path.exists("RML-project-MMHD/thermal"):
    shutil.rmtree("RML-project-MMHD/thermal")

# Training parameters
# NUMBER OF EPOCHS TO TRAIN
num_epochs_thermal = 50

# LOAD YOLOv8 SMALL MODEL FOR THERMAL MODALITY (nano, small, medium...)
model_thermal = YOLO('yolov8s.pt') # LOAD YOLO MODEL FOR TRAINNING Ex.: yolov8n.pt, yolov8s.pt, yolov8m.pt ...

thermal_yaml_path = os.path.join(BASE_DIR, "dataset", "thermal.yaml")

# Start training
#https://docs.ultralytics.com/usage/cfg/#train-settings
model_thermal.train(
    pretrained = False, # DEFINE IF USE PRETRAINED WEIGHTS
    data = thermal_yaml_path, # DATASET CONFIG FILE
    epochs = num_epochs_thermal, #NUMBER OF EPOCHS
    device = 0, # USE GPU
    patience = num_epochs_thermal, # SET patience = num_epochs_thermal TO DISABLE EARLY STOP
    imgsz = 640, # TO REZISE IMAGES, DEFAULT 640
    save = True, # TO SAVE CHECKPOINTS AND FINAL MODEL WEIGHTS
    project='RML-project-MMHD', #NAME OF PROJECT
    name = 'thermal', # SUB-NAME OF PROJECT or MODALITY
    plots = True # TO SHOW PLOTS OF TRAINING AND VALIDATION METRICS
)

# 🧪🎯🌈🖼️ **RGB model validation with test set**

In [None]:
# https://docs.ultralytics.com/modes/val/#arguments-for-yolo-model-validation
# Evaluate on the test set
results_test_rgb = model_rgb.val(
    data='rgb.yaml',
    split='test',
    project='RML-project-MMHD',
    name='test_eval_rgb',
    plots=True
)

# 🧪🎯🔥🌡️ **Thermal model validation with test set**

In [None]:
# https://docs.ultralytics.com/modes/val/#arguments-for-yolo-model-validation
# Evaluate on the test set
results_test_thermal = model_thermal.val(
    data='thermal.yaml',
    split='test',
    project='RML-project-MMHD',
    name='test_eval_thermal',
    plots=True
)

# 🔁🤖 **Alternatively, load previous models**

In [None]:
# ⚙️ OPTIONAL: Load pretrained models from Google Drive if session was restarted
from google.colab import drive
drive.mount('/content/drive')

# Paths for saved best weights
drive_rgb_weights = "/content/drive/MyDrive/RML-project-MMHD/rgb/weights/best.pt"
drive_thermal_weights = "/content/drive/MyDrive/RML-project-MMHD/thermal/weights/best.pt"

# Check if files exist and load them
from ultralytics import YOLO

if os.path.exists(drive_rgb_weights):
    model_rgb = YOLO(drive_rgb_weights)
    print("✅ RGB model loaded from Drive!")

if os.path.exists(drive_thermal_weights):
    model_thermal = YOLO(drive_thermal_weights)
    print("✅ Thermal model loaded from Drive!")

# 👁️‍🗨️🕵️‍♂️ **Predict image (local files or URL image)**

In [None]:
# Base image folder paths
image_folder_rgb = "/content/MMHD-project/MID-3K/dataset/rgb/whole-dataset/rgb/images"
image_folder_thermal = "/content/MMHD-project/MID-3K/dataset/thermal/whole-dataset/thermal/images"

def get_random_image(image_folder):
  # Get a random image filename
  image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.png'))]
  if not image_files:
    raise RuntimeError(f"No PNG images found in {image_folder}")

  random_image_name = random.choice(image_files)
  random_image_path = os.path.join(image_folder, random_image_name)
  return random_image_path, random_image_name


random_image_path_rgb, random_image_name_rgb = get_random_image(image_folder_rgb)
random_image_path_thermal, random_image_name_thermal = get_random_image(image_folder_thermal)

print(f"🔍 Selected random RGB image: {random_image_name_rgb}")
print(f"🔍 Selected random thermal image: {random_image_name_thermal}")
print()

# Run prediction
# Tutorial: https://docs.ultralytics.com/modes/predict/
pred_rgb = model_rgb(random_image_path_rgb)
pred_thermal = model_thermal(random_image_path_thermal)

# Create folder for saving predictions
os.makedirs("predictions", exist_ok=True)

id_rgb = 0
for i in pred_rgb:
    cls = i.boxes.cls.tolist()
    conf = i.boxes.conf.tolist()
    boxes = i.boxes.xywhn.tolist()
    file = 'predictions/rgb_result_' + str(id_rgb) + '.jpg'
    i.save(filename=file)  # Save to disk in predictions folder
    id_rgb += 1

id_thermal = 0
for i in pred_thermal:
     cls = i.boxes.cls.tolist()
     conf = i.boxes.conf.tolist()
     boxes = i.boxes.xywhn.tolist()
     file = 'predictions/thermal_result_' + str(id_thermal) + '.jpg'
     i.save(filename=file)  # Save to disk in predictions folder
     id_thermal += 1

# 📁🧾 **ZIP RML-project-MMHD folder and create RML-project-MMHD.zip**

In [None]:
folder_to_zip = '/content/RML-project-MMHD'

output_filename = '/content/RML-project-MMHD.zip'

shutil.make_archive(output_filename.replace('.zip', ''), 'zip', folder_to_zip)

print(f'Folder {folder_to_zip} was compacted as {output_filename}')

# 💾🖨️ **Save project to Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

OUTPUT_DIR = "/content/drive/MyDrive/RML-2025"
os.makedirs(OUTPUT_DIR, exist_ok=True)

SOURCE_DIR = "/content"

EXCLUDE_DIRS = ['drive', 'sample_data', 'MMHD-project']

all_files_to_copy = []
for root, dirs, files in os.walk(SOURCE_DIR):
    if any(excluded in root for excluded in EXCLUDE_DIRS):
        continue
    for file in files:
        full_path = os.path.join(root, file)
        rel_path = os.path.relpath(full_path, SOURCE_DIR)
        dest_path = os.path.join(OUTPUT_DIR, rel_path)
        all_files_to_copy.append((full_path, dest_path))

print(f"📦 Copying {len(all_files_to_copy)} files to Drive...\n")
for src, dst in tqdm(all_files_to_copy, desc="🔄 Copying", unit=" file"):
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    if os.path.exists(dst):
        print(f"⚠️ Overwriting: {dst}")
    shutil.copy2(src, dst)

print(f"\n✅ Content in /content/ was saved in: {OUTPUT_DIR}")

# 📌🎉✅ **ALL DONE!**