# YOLO-OBB Full Fine-Tuning (Strategy B: Unfrozen)

**Strategy B: Full Fine Tuning (Unfrozen):**
Similar to Strategy A, the model is pretrained on synthetic data. However, in this phase, **all layers are unfrozen** (freeze parameter = 0), allowing the gradients to update the entire network from the backbone to the head on the real world subsets. This evaluates whether allowing the model to adjust its deep feature representations to the real domain yields better adaptation than head only training.

**Structure:**
1. **Setup**: Imports, Google Drive mount, W&B login, and Configuration.
2. **Data Preparation**: Helper functions to split and subset real data.
3. **Training Cells**: 
    - Cells 1-5: Fine-tune synthetic model on 5%, 10%, 20%, 30%, 40% of real data.
    - Cell 6: Train from scratch (Baseline) on Alpha% of real data.

> **IMPORTANT**: Update `REAL_DATASET_PATH`, `SYNTHETIC_MODEL_PATH`, and `BASE_MODEL_PATH` below before running.

## 1. Environment Setup

In [None]:
!pip install -q ultralytics albumentations opencv-python numpy pyyaml tqdm scikit-learn wandb

In [None]:
# 1. Setup & Imports
# Fix environment dependency issues (typing_extensions mismatch with pydantic/wandb)
# Run this FIRST before importing anything else
import subprocess
import sys



# Uncomment and run this once, then restart your kernel
# fix_dependencies()

# ============================================================
# AFTER RESTARTING KERNEL, RUN FROM HERE:
# ============================================================

import os
import random
import shutil
import numpy as np
from pathlib import Path
from tqdm import tqdm
import yaml
from ultralytics import YOLO

## 2. Configuration & Setup

In [None]:
# 1. Setup & Imports
# Fix environment dependency issues (typing_extensions mismatch with pydantic/wandb)
# Run this FIRST before importing anything else
import subprocess
import sys

def fix_dependencies():
    """Fix typing_extensions and pydantic version conflicts"""
    print("Fixing dependency conflicts...")
    
    # Upgrade typing_extensions first
    subprocess.check_call([sys.executable, "-m", "pip", "install", 
                          "--upgrade", "--user", "typing_extensions>=4.6.0"])
    
    # Then upgrade pydantic and wandb
    subprocess.check_call([sys.executable, "-m", "pip", "install", 
                          "--upgrade", "--user", "pydantic>=2.0", "wandb"])
    
    print("Dependencies fixed. Please restart your kernel/runtime!")
    print("After restarting, run the rest of your code.")

# Uncomment and run this once, then restart your kernel
# fix_dependencies()

# ============================================================
# AFTER RESTARTING KERNEL, RUN FROM HERE:
# ============================================================

import os
import random
import shutil
import numpy as np
from pathlib import Path
from tqdm import tqdm
import yaml
from ultralytics import YOLO

# --- CONFIGURATION ---
# [Action Required] Update these paths
REAL_DATASET_PATH = Path("Real_Data/train(80_ REAL DATA)") 
TEST_DATASET_PATH=Path("last-test")
SYNTHETIC_MODEL_PATH = "YOLO26l_OBB_E1_Synthesized_only2/weights/best.pt"
BASE_MODEL_PATH = "yolo26l-obb.pt" # [Action Required] Path to base model for scratch training baseline
DRIVE_UPLOAD_FOLDER = Path("YOLO_OBB_FineTuning_Results")  # Local path for school server

# W&B Configuration
WANDB_PROJECT = "paper_experiment_mixed"
WANDB_ENTITY = "abd-eldjalil-taibi-ensia" # Or your entity

# Training Hyperparameters (Optimized for Fine-tuning)
FINETUNE_CONFIG = {
    "epochs": 100,
    "imgsz": 640,
    "batch": 12,
    "workers": 20,
    "optimizer": "AdamW",
    "lr0": 0.0005, # Lower LR for fine-tuning (was 0.001) to preserve features
    "lrf": 0.01,
    "momentum": 0.937,
    "weight_decay": 0.0005,
    "close_mosaic": 15,
    "patience": 25,
    "save_period": 10,
    "val": True,
    "amp": True,
    "device": 0,
    "freeze": 0, # Unfrozen: Update all layers (Strategy B)
    "warmup_epochs": 1.0, # Reduced warmup for fine-tuning
    # Augmentations (Matches reference training loop)
    "hsv_h": 0.015, "hsv_s": 0.7, "hsv_v": 0.4,
    "degrees": 10.0,
    "translate": 0.1,
    "scale": 0.9,
    "fliplr": 0.5,
    "mosaic": 1.0,
    "mixup": 0.1,
    "erasing": 0.3,
    "perspective": 0.0005,
}

# Training Hyperparameters (From Reference - For Scratch/Baseline Training)
SCRATCH_CONFIG = {
    "epochs": 100,
    "imgsz": 640,
    "batch": 12,
    "workers": 20,
    "optimizer": "AdamW",
    "lr0": 0.001, # High LR for scratch training
    "lrf": 0.01,
    "momentum": 0.937,
    "weight_decay": 0.0005,
    "close_mosaic": 15,
    "patience": 25,
    "save_period": 10,
    "val": True,
    "amp": True,
    "device": 0,
    # No freezing for scratch training
    
    # Augmentations (Matches reference training loop)
    "hsv_h": 0.015, "hsv_s": 0.7, "hsv_v": 0.4,
    "degrees": 10.0,
    "translate": 0.1,
    "scale": 0.9,
    "fliplr": 0.5,
    "mosaic": 1.0,
    "mixup": 0.1,
    "erasing": 0.3,
    "perspective": 0.0005,
}

IMG_EXTS = [".jpg", ".jpeg", ".png"]
RANDOM_SEED = 42
random.seed(RANDOM_SEED)

# Create output directory
DRIVE_UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)

# Login to W&B
import wandb
wandb.login()

print("Setup complete! Ready for training.")

## 3. Data Preparation

In [None]:
import random
import shutil
from pathlib import Path
from ultralytics import YOLO

# --- CONFIGURATION FOR SPLITS ---
# We run this ONCE to ensure every experiment uses the exact same validation set.
INTERNAL_TRAIN_LIST = Path("internal_train_list.txt")
INTERNAL_VAL_LIST = Path("internal_val_list.txt")

def prepare_fixed_splits(real_dataset_path):
    """
    Scans the REAL_DATASET_PATH (your 80% chunk) and creates:
    1. A fixed Internal Validation set (20% of this chunk).
    2. A fixed Internal Training pool (80% of this chunk).
    Saves them to text files so they never change between runs.
    """
    if INTERNAL_TRAIN_LIST.exists() and INTERNAL_VAL_LIST.exists():
        print("✅ Fixed splits already exist. Using existing lists.")
        return

    print("⚠️ Generating new fixed splits from Real Data...")
    images_dir = real_dataset_path / "images"
    
    # 1. Gather all images
    all_images = []
    # Assuming IMG_EXTS is defined earlier, e.g., ['.jpg', '.png', '.jpeg']
    for ext in IMG_EXTS:
        all_images.extend(images_dir.rglob(f"*{ext}"))
    
    all_images = sorted(list(set(all_images)))
    
    # 2. Shuffle with a FIXED SEED
    random.seed(42) 
    random.shuffle(all_images)
    
    # 3. Split 80/20 (Internal Train / Internal Val)
    split_idx = int(len(all_images) * 0.8)
    train_pool = all_images[:split_idx]
    val_pool = all_images[split_idx:]
    
    # 4. Save to disk
    with open(INTERNAL_TRAIN_LIST, 'w') as f:
        f.write('\n'.join([str(p.resolve()) for p in train_pool]))
        
    with open(INTERNAL_VAL_LIST, 'w') as f:
        f.write('\n'.join([str(p.resolve()) for p in val_pool]))
        
    print(f"Split Complete: {len(train_pool)} Train, {len(val_pool)} Val.")
    print("Lists saved to disk.")

def create_subset_yaml(percentage, run_name):
    """
    Creates a YAML file using the FIXED Internal Val set and a SUBSET of the Internal Train set.
    Also links the External Test set.
    """
    # 1. Read the Master Train Pool
    with open(INTERNAL_TRAIN_LIST, 'r') as f:
        full_train = [x.strip() for x in f.readlines() if x.strip()]
        
    # 2. Create the Subset (e.g., 10% of the train pool)
    subset_size = int(len(full_train) * percentage)
    train_subset = full_train[:subset_size]
    
    # Save the specific subset list for this run
    train_subset_path = Path(f"{run_name}_train.txt")
    with open(train_subset_path, 'w') as f:
        f.write('\n'.join(train_subset))
        
    print(f"[{run_name}] Training on {len(train_subset)} images (Subset of {len(full_train)})")
    
    # 3. Define path for External Test Set
    # We assume your test images are in TEST_DATASET_PATH / "images"
    # If your TEST_DATASET_PATH is already a .txt list, use that instead.
    test_images_dir = TEST_DATASET_PATH / "images" 
    
    # 4. Create YAML
    # TRAIN: The subset we just made
    # VAL: The fixed internal validation list
    # TEST: The external folder
    yaml_content = f"""
path: {Path.cwd()}
train: {train_subset_path.resolve()}
val: {INTERNAL_VAL_LIST.resolve()}
test: {test_images_dir.resolve()}

nc: 1
names: ['object']
"""
    yaml_path = Path(f"{run_name}_dataset.yaml")
    with open(yaml_path, 'w') as f:
        f.write(yaml_content)
        
    return yaml_path

def run_finetuning(percentage, run_name_suffix):
    run_name = f"yolo_obb_ft_{run_name_suffix}"
    
    # 1. Prepare Data (Uses the fixed splits)
    dataset_yaml = create_subset_yaml(percentage, run_name)
    
    # 2. Load Model
    model = YOLO(SYNTHETIC_MODEL_PATH)
    
    # 3. Train
    # The model will use 'val' (Internal Val) to decide when to stop / save best weights.
    model.train(
        data=str(dataset_yaml),
        project=WANDB_PROJECT,
        name=run_name,
        **FINETUNE_CONFIG
    )
    
    # 4. Save & Upload
    best_weights = Path(model.trainer.save_dir) / "weights" / "best.pt"
    target_name = f"{run_name}.pt"
    target_path = DRIVE_UPLOAD_FOLDER / target_name
    
    if best_weights.exists():
        print(f"Uploading {best_weights} to {target_path}...")
        shutil.copy(best_weights, target_path)
    else:
        print("Warning: Best weights file not found.")

def run_training_scratch(percentage, run_name_suffix):
    """
    Trains from scratch/baseline using the same fixed splits.
    """
    run_name = f"yolo_obb_scratch_{run_name_suffix}"
    
    dataset_yaml = create_subset_yaml(percentage, run_name)
    
    try:
        model = YOLO(BASE_MODEL_PATH) 
    except Exception as e:
        model = YOLO("yolo11l-obb.pt")

    model.train(
        data=str(dataset_yaml),
        project=WANDB_PROJECT,
        name=run_name,
        **SCRATCH_CONFIG
    )
    
    best_weights = Path(model.trainer.save_dir) / "weights" / "best.pt"
    target_name = f"{run_name}.pt"
    target_path = DRIVE_UPLOAD_FOLDER / target_name
    
    if best_weights.exists():
        print(f"Uploading {best_weights} to {target_path}...")
        shutil.copy(best_weights, target_path)

## 4. Training Experiments

### Experiment: 5% Data

In [None]:
prepare_fixed_splits(REAL_DATASET_PATH)
# Cell 1: 5% Fine-tuning
run_finetuning(percentage=0.05, run_name_suffix="5pct")

### Experiment: 10% Data

In [None]:
prepare_fixed_splits(REAL_DATASET_PATH)
# Cell 2: 10% Fine-tuning
run_finetuning(percentage=0.10, run_name_suffix="10pct")

### Experiment: 20% Data

In [None]:
prepare_fixed_splits(REAL_DATASET_PATH)
# Cell 3: 20% Fine-tuning
run_finetuning(percentage=0.20, run_name_suffix="20pct")

### Experiment: 30% Data

In [None]:
prepare_fixed_splits(REAL_DATASET_PATH)
# Cell 4: 30% Fine-tuning
run_finetuning(percentage=0.30, run_name_suffix="30pct")

### Experiment: 40% Data

In [None]:
prepare_fixed_splits(REAL_DATASET_PATH)
# Cell 5: 40% Fine-tuning
run_finetuning(percentage=0.40, run_name_suffix="40pct")

### Experiment: 50% Data

In [None]:
prepare_fixed_splits(REAL_DATASET_PATH)
# Cell 5: 50% Fine-tuning
run_finetuning(percentage=0.50, run_name_suffix="50pct")

### Baseline: Scratch Training (40%)

In [None]:
alpha_pct = 0.40 # [Editable] Change this variable to your desired percentage (0.0 to 1.0)

run_training_scratch(percentage=alpha_pct, run_name_suffix="alpha_baseline_40")

### Baseline: Scratch Training (Alpha)

In [None]:
# Cell 6: Alpha% Scratch Training (Baseline)
# This trains from the BASE model (not synthetic) to establish a baseline for comparison.
alpha_pct = 0.50 # [Editable] Change this variable to your desired percentage (0.0 to 1.0)

run_training_scratch(percentage=alpha_pct, run_name_suffix="alpha_baseline")

# 5. Models Testing on Real Data

This section evaluates the fine-tuned models on the held-out test set.

In [None]:
# Setup Test Data
TEST_DATASET_PATH = Path("last-test")

def create_test_yaml(test_path, run_name):
    """
    Creates a YAML file for the test dataset.
    """
    images_dir = test_path / "images"
    
    # Scan all valid image files
    test_images = []
    for ext in IMG_EXTS:
        test_images.extend(images_dir.rglob(f"*{ext}"))
        
    test_images = sorted(list(set(test_images)))
    print(f"[{run_name}] Total Test Images: {len(test_images)}")
    
    # Create txt path
    test_txt_path = Path(f"{run_name}_test.txt")
    
    with open(test_txt_path, 'w') as f:
        f.write('\n'.join([str(p.resolve()) for p in test_images]))
        
    # Create YAML
    yaml_content = f"""
path: {Path.cwd()}
train: {test_txt_path.resolve()} # Not used for validation but required
val: {test_txt_path.resolve()}  # Used for validation
test: {test_txt_path.resolve()} # Used for prediction/testing

nc: 1
names: ['object']
"""
    yaml_path = Path(f"{run_name}_test_dataset.yaml")
    with open(yaml_path, 'w') as f:
        f.write(yaml_content)
        
    return yaml_path

### Run Evaluation

In [None]:
# Run Evaluation on All Models
# Paths are relative to the notebook location (runs/obb/...)
models_to_test = [
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_5pct.pt",
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_10pct.pt",
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_20pct.pt",
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_30pct.pt",
    "YOLO_OBB_Mixed_Training_Results/yolo26l_mixed_40pct.pt",
    "YOLO_OBB_FineTuning_Results/yolo_obb_scratch_alpha_baseline_40.pt",
    "YOLO_OBB_Mixed_Training_Results/YOLO26l_OBB_Real_Data_only.pt",
    "YOLO_OBB_Mixed_Training_Results/YOLO26l_OBB_E1_Synthesized_only.pt"
    
]
results_summary = {}
test_yaml = create_test_yaml(TEST_DATASET_PATH, "test_v3_finetune")

for model_name in models_to_test:
    model_path = Path(model_name)
    
    if not model_path.exists():
        print(f"Model {model_name} not found at {model_path.resolve()}. Skipping.")
        continue
        
    print(f"\nEvaluating {model_name}...")
    try:
        model = YOLO(model_path)
        # Extract valid run name for wandb
        run_name = model_path.parent.parent.name # e.g. yolo_obb_ft_5pct
        
        metrics = model.val(data=str(test_yaml), split='test', project=WANDB_PROJECT, name=f"test_{run_name}")
        
        results_summary[model_name] = {
            "map50": metrics.box.map50,
            "map50-95": metrics.box.map,
            "precision": metrics.box.mp,  # mean precision
            "recall": metrics.box.mr      # mean recall
        }
        print(f"Result for {model_name}:")
        print(f"  mAP50={metrics.box.map50:.4f}, mAP50-95={metrics.box.map:.4f}")
        print(f"  Precision={metrics.box.mp:.4f}, Recall={metrics.box.mr:.4f}")
        
    except Exception as e:
        print(f"Error evaluating {model_name}: {e}")

print("\n=== FINAL RESULTS ===")
for m, res in results_summary.items():
    print(f"{m}:")
    print(f"  mAP50={res['map50']:.4f}, mAP50-95={res['map50-95']:.4f}")
    print(f"  Precision={res['precision']:.4f}, Recall={res['recall']:.4f}")

# Save results to file
import json
from datetime import datetime

# Create results directory if it doesn't exist
results_dir = Path("YOLO_OBB_Mixed_Training_Results/test_results_last")
results_dir.mkdir(parents=True, exist_ok=True)

# Save as JSON
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
json_file = results_dir / f"test_results_{timestamp}.json"
with open(json_file, 'w') as f:
    json.dump(results_summary, f, indent=4)
print(f"\nResults saved to: {json_file}")

# Save as formatted text file
txt_file = results_dir / f"test_results_{timestamp}.txt"
with open(txt_file, 'w') as f:
    f.write("=== MODEL EVALUATION RESULTS ===\n")
    f.write(f"Test Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
    
    for m, res in results_summary.items():
        f.write(f"Model: {m}\n")
        f.write(f"  mAP50:     {res['map50']:.4f}\n")
        f.write(f"  mAP50-95:  {res['map50-95']:.4f}\n")
        f.write(f"  Precision: {res['precision']:.4f}\n")
        f.write(f"  Recall:    {res['recall']:.4f}\n")
        f.write("\n")
print(f"Results saved to: {txt_file}")