In [None]:
!pip install ultralytics

In [None]:
from ultralytics import YOLO
import yaml
import os
from pathlib import Path
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
from collections import defaultdict
import torch

# Define Kaggle paths  
kaggle_input_dir = "/kaggle/input/document-dataset-for-yolo/document-dataset-for-yolo"
#kaggle_input_dir ="/kaggle/input/mini-document-dataset-for-yolo/mini-document-dataset-for-yolo"

kaggle_working_dir = "/kaggle/working"
os.makedirs(kaggle_working_dir, exist_ok=True)

# Check available GPUs and print info
print(f"Available GPUs: {torch.cuda.device_count()}")
if torch.cuda.device_count() > 0:
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# Path to existing yaml file
yaml_path = os.path.join(kaggle_input_dir, "dataset.yaml")

# Check if YAML file exists
if not os.path.exists(yaml_path):
    print(f"Error: YAML file not found at {yaml_path}")
else:
    # Load the existing YAML to display info
    with open(yaml_path, 'r') as f:
        config = yaml.safe_load(f)
    print(f"Using existing YAML config at {yaml_path}")
    print(f"Dataset classes: {config.get('names', {})}")
    print(f"Train path: {config.get('train', '')}")
    print(f"Val path: {config.get('val', '')}")

# Initialize model - using YOLOv11m for better performance
model = YOLO('yolo11s.pt')

# Training parameters for single GPU
training_params = {
    # Training parameters
    "epochs": 30, 
    "imgsz": 1024,
    "batch": 8,     # Reduced for single GPU
    "patience": 15,
    "save": True,
    "device": 0,    # Use single GPU
    "workers": 4,   # Reduced for single GPU
    "optimizer": "SGD",
    "verbose": True,
    "seed": 42,
    "deterministic": True,
    "single_cls": False,  # Multiple classes for documents
    "rect": False,
    "cos_lr": True,
    "close_mosaic": 10,
    "resume": False,
    "amp": True,     # Automatic Mixed Precision
    "fraction": 1.0,
    "exist_ok": True,
    "pretrained": True,
    "plots": True,
    "name": "document_detector_single_gpu",
    
    # Hyperparameters
    "lr0": 0.01,
    "lrf": 0.01,
    "momentum": 0.937,
    "weight_decay": 0.0005,
    "warmup_epochs": 3.0,
    "warmup_momentum": 0.8,
    "warmup_bias_lr": 0.1,
    "box": 7.5,      # Box loss gain (default)
    "cls": 0.5,      # Class loss gain (default)
    "dfl": 1.5,
    
    "hsv_h": 0.015,  # Hue variation (very slight - documents are mostly B&W)
    "hsv_s": 0.3,    # Saturation variation (moderate - for scanned documents)
    "hsv_v": 0.3,    # Value/brightness variation (moderate - lighting conditions)
    
    # GEOMETRIC AUGMENTATIONS - CONSERVATIVE (preserve text readability)
    "degrees": 5.0,      # Small rotation (±5°) - realistic for slight page tilt
    "translate": 0.05,   # Small translation (5%) - document position variation
    "scale": 0.15,       # Moderate scaling (±15%) - distance from camera/scanner
    "shear": 2.0,        # Minimal shear (2°) - perspective distortion
    "perspective": 0.0001, # Very minimal perspective - almost flat documents
    
    # FLIP AUGMENTATIONS - STRATEGIC
    "flipud": 0.0,       # NO vertical flip (text would be upside down)
    "fliplr": 0.0,       # NO horizontal flip (equations have directionality)
    
    # ADVANCED AUGMENTATIONS - DISABLED (can distort text/equations)
    "mosaic": 0.0,       # NO mosaic (would break document structure)
    "mixup": 0.0,        # NO mixup (would blend different documents)
    "copy_paste": 0.0,   # NO copy-paste (can create unrealistic layouts)
    
    # ADDITIONAL RECOMMENDED SETTINGS
    "dropout": 0.0,      # No dropout augmentation
    "auto_augment": None,
}

print("Starting training on single GPU...")
print(f"Training parameters: {training_params}")

# Train the model directly using the original YAML
results = model.train(
    data=yaml_path,
    **training_params
)
# Save the trained model to Kaggle working directory
model_save_path = os.path.join(kaggle_working_dir, "document_detection_model.pt")
model.save(model_save_path)
print(f"Model saved to Kaggle working directory: {model_save_path}")

# Also export to ONNX format
try:
    onnx_save_path = os.path.join(kaggle_working_dir, "document_detection_model.onnx") 
    model.export(format="onnx", imgsz=1024)
    print(f"Model exported to ONNX format at: {onnx_save_path}")
except Exception as e:
    print(f"ONNX export failed: {e}")

# Safely save training history if available
try:
    # First, check if the results object has the results_dict attribute
    if hasattr(results, 'results_dict') and results.results_dict:
        # Try to convert to DataFrame with explicit index
        if isinstance(results.results_dict, dict):
            # If it's a dictionary of epoch data
            history_df = pd.DataFrame.from_dict(results.results_dict, orient='index')
            history_path = os.path.join(kaggle_working_dir, "training_history.csv")
            history_df.to_csv(history_path)
            print(f"Training history saved to: {history_path}")
        else:
            # If it's a list or another format
            history_df = pd.DataFrame(results.results_dict, index=range(len(results.results_dict)))
            history_path = os.path.join(kaggle_working_dir, "training_history.csv")
            history_df.to_csv(history_path, index=False)
            print(f"Training history saved to: {history_path}")
    else:
        print("No results dictionary found in training results. Looking for CSV file...")
        
    # Try to find the CSV file that YOLO automatically creates
    runs_dir = os.path.join(kaggle_working_dir, "runs", "detect", "document_detector_single_gpu")
    results_csv_path = os.path.join(runs_dir, "results.csv")
    
    if os.path.exists(results_csv_path):
        print(f"Found training history at: {results_csv_path}")
        history_df = pd.read_csv(results_csv_path)
        history_path = os.path.join(kaggle_working_dir, "training_history.csv")
        history_df.to_csv(history_path, index=False)
        print(f"Training history copied to: {history_path}")
except Exception as e:
    print(f"Error saving training history: {str(e)}")
    print("Continuing with validation...")

# Validate the model
val_results = model.val(
    data=yaml_path,
    imgsz=1024,
    batch=8,
    device=0,
    plots=True
)

print(f"Validation results:")
print(f"  mAP@0.5 = {val_results.box.map50:.4f}")
print(f"  mAP@0.5:0.95 = {val_results.box.map:.4f}")
print(f"  Precision = {val_results.box.mp:.4f}")
print(f"  Recall = {val_results.box.mr:.4f}")

# Save validation metrics
metrics = {
    "mAP50": val_results.box.map50,
    "mAP50-95": val_results.box.map,
    "precision": val_results.box.mp,
    "recall": val_results.box.mr,
    "maps":val_results.box.maps
}
metrics_df = pd.DataFrame([metrics])
metrics_path = os.path.join(kaggle_working_dir, "validation_metrics.csv")
metrics_df.to_csv(metrics_path, index=False)
print(f"Validation metrics saved to: {metrics_path}")

# Get class-wise metrics
class_names = list(config.get('names', {}).values()) if isinstance(config.get('names'), dict) else config.get('names', [])
if len(class_names) > 0:
    print("\nClass-wise metrics:")
    for i, class_name in enumerate(class_names):
        if hasattr(val_results.box, 'maps') and i < len(val_results.box.maps):
            print(f"  {class_name}: mAP50 = {val_results.box.maps[i]:.4f}")


# Add this code after your validation section (after line 197)

# Get class-wise and overall metrics
print("\n" + "="*60)
print("COMPREHENSIVE VALIDATION METRICS")
print("="*60)

# Overall metrics
overall_precision = val_results.box.mp
overall_recall = val_results.box.mr
overall_f1 = 2 * (overall_precision * overall_recall) / (overall_precision + overall_recall + 1e-6)

print("\n--- OVERALL METRICS ---")
print(f"Precision: {overall_precision:.4f}")
print(f"Recall: {overall_recall:.4f}")
print(f"F1-Score: {overall_f1:.4f}")
print(f"mAP@0.5: {val_results.box.map50:.4f}")
print(f"mAP@0.5:0.95: {val_results.box.map:.4f}")

# Class-wise metrics
print("\n--- CLASS-WISE METRICS ---")

class_metrics_list = []

# Check if per-class metrics are available
if hasattr(val_results.box, 'ap_class_index'):
    ap_class_index = val_results.box.ap_class_index  # Classes that were found in validation
    
    # Get per-class precision and recall
    if hasattr(val_results.box, 'p') and hasattr(val_results.box, 'r'):
        precision_per_class = val_results.box.p  # Precision per class
        recall_per_class = val_results.box.r      # Recall per class
    else:
        # Fallback if p and r attributes don't exist
        precision_per_class = [overall_precision] * len(ap_class_index)
        recall_per_class = [overall_recall] * len(ap_class_index)
    
    # Get mAP per class
    maps = val_results.box.maps  # mAP@0.5 per class
    
    # Get mAP@0.5:0.95 per class if available
    if hasattr(val_results.box, 'ap') and val_results.box.ap is not None:
        ap = val_results.box.ap
        if len(ap.shape) > 1:
            map50_95_per_class = ap.mean(axis=1)  # Average across IoU thresholds
        else:
            map50_95_per_class = ap
    else:
        map50_95_per_class = [0] * len(ap_class_index)
    
    # Print and collect metrics for each class
    for i, cls_idx in enumerate(ap_class_index):
        cls_name = class_names[cls_idx] if cls_idx < len(class_names) else f"Class {cls_idx}"
        
        prec = float(precision_per_class[i]) if i < len(precision_per_class) else 0.0
        rec = float(recall_per_class[i]) if i < len(recall_per_class) else 0.0
        f1 = 2 * (prec * rec) / (prec + rec) if (prec + rec) > 0 else 0.0
        map50 = float(maps[i]) if i < len(maps) else 0.0
        map50_95 = float(map50_95_per_class[i]) if i < len(map50_95_per_class) else 0.0
        
        print(f"\n{cls_name}:")
        print(f"  Precision:    {prec:.4f}")
        print(f"  Recall:       {rec:.4f}")
        print(f"  F1-Score:     {f1:.4f}")
        print(f"  mAP@0.5:      {map50:.4f}")
        print(f"  mAP@0.5:0.95: {map50_95:.4f}")
        
        class_metrics_list.append({
            'class_id': int(cls_idx),
            'class_name': cls_name,
            'precision': prec,
            'recall': rec,
            'f1_score': f1,
            'mAP50': map50,
            'mAP50-95': map50_95
        })
else:
    print("Per-class metrics not available in validation results")

# Save detailed metrics to CSV
if class_metrics_list:
    # Create DataFrame with both overall and class-wise metrics
    overall_row = {
        'class_id': -1,
        'class_name': 'OVERALL',
        'precision': overall_precision,
        'recall': overall_recall,
        'f1_score': overall_f1,
        'mAP50': val_results.box.map50,
        'mAP50-95': val_results.box.map
    }
    
    all_metrics = [overall_row] + class_metrics_list
    metrics_df = pd.DataFrame(all_metrics)
    
    detailed_metrics_path = os.path.join(kaggle_working_dir, "detailed_class_metrics.csv")
    metrics_df.to_csv(detailed_metrics_path, index=False)
    print(f"\nDetailed metrics saved to: {detailed_metrics_path}")
    
    
