# YOLOv11 Medical Object Detection (Optimized)

This notebook is an optimized version for training YOLOv11 on the Thesis Dataset.
**Improvements:**
- Local & Colab compatibility (no hardcoded `/content/` paths).
- Auto-detects GPU/CPU.
- Structured configuration and improved readability.

## 1. Setup Environment

In [None]:
import os
import sys
import shutil
import yaml
import json
import random
import datetime
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
from PIL import Image
from IPython.display import display, Image as IPyImage

# Check for local vs Colab environment and install dependencies if needed
try:
    import google.colab
    IN_COLAB = True
    print("Running in Google Colab environment.")
    # Uncomment the line below to install dependencies automatically in Colab/Fresh env
    # !pip install ultralytics roboflow thop 
except ImportError:
    IN_COLAB = False
    print("Running in Local environment.")

import torch
from ultralytics import YOLO
from roboflow import Roboflow

# Device Configuration
DEVICE = '0' if torch.cuda.is_available() else 'cpu'
print(f"Using Device: {DEVICE}")
if DEVICE == '0':
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2. Configuration
Set your hyperparameters and API keys here.

In [None]:
# === Configuration ===
ROBOFLOW_API_KEY = "e3DAOLcZTZkR7VlO5jY2"  # Make sure to keep this secure!
PROJECT_NAME = "mkthesis-v4-kfvrb"
VERSION_NUMBER = 3

MODEL_NAME = "yolo11n.pt" # Pretrained model to start from
IMG_SIZE = 640
BATCH_SIZE = 16
EPOCHS = 300
PATIENCE = 10
OPTIMIZER = 'SGD'
LR0 = 0.01
LRF = 0.1

# Output Directory
RUN_NAME = 'train_optimized'
PROJECT_DIR = Path('runs/detect')
SAVE_DIR = PROJECT_DIR / RUN_NAME
print(f"Training results will be saved to: {SAVE_DIR}")

## 3. Dataset Preparation

In [None]:
# Initialize Roboflow
rf = Roboflow(api_key=ROBOFLOW_API_KEY)
project = rf.workspace("pcithesis").project(PROJECT_NAME)
version = project.version(VERSION_NUMBER)

# Download Dataset
dataset = version.download("yolov11")

# dataset.location contains the absolute path to the downloaded dataset
DATASET_DIR = Path(dataset.location)
DATA_YAML = DATASET_DIR / "data.yaml"

print(f"Dataset downloaded to: {DATASET_DIR}")
print(f"Data YAML: {DATA_YAML}")

# Verify data.yaml content
with open(DATA_YAML, 'r') as f:
    yaml_content = yaml.safe_load(f)
    print("\nYAML Content:")
    print(yaml_content)

## 4. Model Training

In [None]:
# Load Model
model = YOLO(MODEL_NAME)

# Train Model
results = model.train(
    data=str(DATA_YAML),
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    optimizer=OPTIMIZER,
    lr0=LR0,
    lrf=LRF,
    momentum=0.937,
    weight_decay=0.0005,
    patience=PATIENCE,
    device=DEVICE,
    project=str(PROJECT_DIR),
    name=RUN_NAME,
    save=True,
    plots=True,
    verbose=True,
    exist_ok=True # Overwrite if exists, or set False to increment run name
)

## 5. Evaluation & Analysis

In [None]:
# Load Best Model
best_weight_path = SAVE_DIR / 'weights' / 'best.pt'
print(f"Loading best weights from: {best_weight_path}")

if best_weight_path.exists():
    best_model = YOLO(best_weight_path)
else:
    print("Warning: Best weights not found. Using current model state.")
    best_model = model

# Model Info
best_model.info(verbose=True)

# Calculate GFLOPs & Params (using thop if installed, or torch)
try:
    from thop import profile
    dummy_input = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(DEVICE if DEVICE != 'cpu' else 'cpu')
    best_model.model.to(DEVICE if DEVICE != 'cpu' else 'cpu')
    macs, params = profile(best_model.model, inputs=(dummy_input, ), verbose=False)
    gflops = macs / 1e9
    print(f"GFLOPs: {gflops:.2f}")
    print(f"Parameters: {params:,}")
except ImportError:
    print("thop library not installed. Skipping GFLOPs calculation.")
except Exception as e:
    print(f"Error calculating GFLOPs: {e}")

In [None]:
# Validate on Test Set
test_metrics = best_model.val(
    data=str(DATA_YAML),
    split='test',
    imgsz=IMG_SIZE,
    batch=BATCH_SIZE,
    device=DEVICE,
    project=str(PROJECT_DIR),
    name=f"{RUN_NAME}_val",
    plots=True
)

# Extract Key Metrics
precision = test_metrics.box.mp
recall = test_metrics.box.mr
map50 = test_metrics.box.map50
map50_95 = test_metrics.box.map
f1_score = 2 * (precision * recall) / (precision + recall + 1e-6)

print("\n----- Test Metrics -----")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1_score:.4f}")
print(f"mAP50:     {map50:.4f}")
print(f"mAP50-95:  {map50_95:.4f}")

# Save Metrics to Text File
with open("test_metrics.txt", "w") as f:
    f.write("----- Test Metrics -----\n")
    f.write(f"Precision: {precision:.4f}\n")
    f.write(f"Recall:    {recall:.4f}\n")
    f.write(f"F1 Score:  {f1_score:.4f}\n")
    f.write(f"mAP50:     {map50:.4f}\n")
    f.write(f"mAP50-95:  {map50_95:.4f}\n")

In [None]:
# Export Per-Class Metrics to CSV
try:
    num_classes = len(test_metrics.box.p)
    class_names = [best_model.names[i] for i in range(num_classes)]
    
    df_metrics = pd.DataFrame({
        "Class ID": range(num_classes),
        "Class Name": class_names,
        "Precision": test_metrics.box.p,
        "Recall": test_metrics.box.r,
        "F1 Score": test_metrics.box.f1,
        "mAP50": test_metrics.box.ap50,
        "mAP50-95": test_metrics.box.ap
    })
    
    csv_path = "per_class_metrics.csv"
    df_metrics.to_csv(csv_path, index=False)
    print(f"✅ Per-class metrics exported to {csv_path}")
    display(df_metrics)
except Exception as e:
    print(f"Error exporting per-class metrics: {e}")

In [None]:
# Export Confusion Matrix
try:
    # Confusion matrix is automatically saved by val() method in the run folder
    # Access raw matrix if needed
    cm = test_metrics.confusion_matrix.matrix
    np.savetxt("confusion_matrix.csv", cm, delimiter=",", fmt="%.0f")
    print("✅ Confusion matrix exported to confusion_matrix.csv")
except Exception as e:
    print(f"Note: Could not export custom confusion matrix CSV ({e})")

# Display Training Results (Loss, mAP curves)
results_csv = SAVE_DIR / 'results.csv'
if results_csv.exists():
    df_results = pd.read_csv(results_csv)
    df_results.columns = [c.strip() for c in df_results.columns]
    
    plt.figure(figsize=(12, 5))
    
    # Loss Plot
    plt.subplot(1, 2, 1)
    plt.plot(df_results['epoch'], df_results['train/box_loss'], label='Train Box Loss')
    plt.plot(df_results['epoch'], df_results['val/box_loss'], label='Val Box Loss')
    plt.legend()
    plt.title('Box Loss')
    plt.xlabel('Epoch')
    
    # mAP Plot
    plt.subplot(1, 2, 2)
    plt.plot(df_results['epoch'], df_results['metrics/mAP50(B)'], label='mAP50')
    plt.plot(df_results['epoch'], df_results['metrics/mAP50-95(B)'], label='mAP50-95')
    plt.legend()
    plt.title('mAP Metrics')
    plt.xlabel('Epoch')
    
    plt.tight_layout()
    plt.show()
else:
    print("Results CSV not found yet (training might not have started or failed).")

In [None]:
# Export Results to Zip
# Construct Zip Filename with Datetime
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
zip_filename = f"{MODEL_NAME}_{PROJECT_NAME}_v{VERSION_NUMBER}_{timestamp}"
zip_path = Path(f"{zip_filename}.zip")

print(f"Zipping results to {zip_path}...")

# Create Zip Archive of the Save Directory
try:
    shutil.make_archive(zip_filename, 'zip', SAVE_DIR)
    print(f"✅ Zip created successfully: {zip_path.resolve()}")

    # Trigger Download if in Colab
    if IN_COLAB:
        from google.colab import files
        files.download(str(zip_path))
        print("Download triggered.")
    else:
        print(f"File saved locally at: {zip_path.absolute()}")
except Exception as e:
    print(f"Error creating or downloading zip: {e}")

## 6. Conclusion
Training is complete. Metrics and models have been exported.