In [4]:
# Install all required libraries quietly
!pip install -q ultralytics pandas matplotlib

# Import a library to check the setup and display a success message
import os
import pkg_resources

# Verification
required = {'ultralytics', 'pandas', 'matplotlib'}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed



[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/1.1 MB[0m [31m11.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.1 MB[0m [31m18.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25h

  import pkg_resources


In [None]:
# =====================================================================================
#         EXPERT'S YOLO TRAINING SCRIPT (KNOWLEDGE DISTILLATION FOR SPEED)
# =====================================================================================

# Standard library imports
import os
import sys
import yaml
import json
from pathlib import Path

# Third-party imports
import cv2
import torch
import torch.nn as nn
from ultralytics import YOLO
from ultralytics.utils.loss import v8DetectionLoss
from ultralytics.models.yolo.detect.train import DetectionTrainer
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image

# =====================================================================================
# 🔧 1. MASTER CONFIGURATION
# =====================================================================================

# ===> DATASET PATH CONFIGURATION <===
TRAIN_DATA_ROOT = Path("/content/drive/MyDrive/MICROSOFT HACK/hackathon2_train_1")
TEST_DATA_ROOT = Path("/content/drive/MyDrive/MICROSOFT HACK/Hackathon2_test1")

# ===> EXECUTION CONFIGURATION <===
EXECUTION_CONFIG = {
    'MODE': 'train'  # Options: 'train', 'predict', 'analyze'
}

# ===> GOOGLE DRIVE CONFIGURATION <===
GOOGLE_DRIVE_ROOT = Path("/content/drive/MyDrive/YOLO_Safety_Project_Nano_KD")

# CLASS NAMES - 7 SAFETY OBJECTS
CLASS_NAMES = [
    'OxygenTank', 'NitrogenTank', 'FirstAidBox', 'FireAlarm',
    'SafetySwitchPanel', 'EmergencyPhone', 'FireExtinguisher'
]

# ✅ OPTIMIZED HYPERPARAMETERS FOR NANO MODEL
TRAINING_CONFIG = {
    'epochs': 60,             # ✅ Adjusted for faster training nano model
    'mosaic': 1.0,
    'optimizer': 'AdamW',
    'momentum': 0.937,
    'lr0': 0.01,
    'lrf': 0.01,
    'batch': 16,
    'imgsz': 640,
    'patience': 25,
    'warmup_epochs': 3,
    'weight_decay': 0.0005,
    'single_cls': False,
    'amp': True,
    'device': 0,
    'cos_lr': True,
    'mixup': 0.05,            # ✅ Reduced augmentation intensity for smaller model
    'copy_paste': 0.05        # ✅ Reduced augmentation intensity for smaller model
}

# ✅ KNOWLEDGE DISTILLATION SETTINGS FOR SPEED & ACCURACY
KD_CONFIG = {
    'enable': True,
    'teacher_model': 'yolov8l.pt', # ✅ Using Large model as the expert teacher
    'student_model': 'yolov8n.pt', # ✅ Switched to Nano model for <50ms inference
    'temperature': 4.0,
    'hard_weight': 0.7,
    'soft_weight': 0.3,
}

# =====================================================================================
# 🧠 2. KNOWLEDGE DISTILLATION IMPLEMENTATION (Unchanged)
# =====================================================================================
class YoloKDLoss(nn.Module):
    def __init__(self, student_model, hard_weight, soft_weight, temperature):
        super().__init__()
        self.hard_weight, self.soft_weight, self.temperature = hard_weight, soft_weight, temperature
        self.native_loss = v8DetectionLoss(student_model)
        self.kl_div = nn.KLDivLoss(reduction='batchmean')

    def forward(self, student_preds, teacher_preds, batch):
        hard_loss, soft_loss_val = self.native_loss(student_preds, batch), 0.0
        if all(len(p) > 0 and p[-1] is not None for p in [student_preds, teacher_preds]):
            student_logits, teacher_logits = student_preds[-1], teacher_preds[-1].detach()
            student_soft = torch.log_softmax(student_logits / self.temperature, dim=-1)
            teacher_soft = torch.softmax(teacher_logits / self.temperature, dim=-1)
            soft_loss_val = self.kl_div(student_soft, teacher_soft) * (self.temperature ** 2)
        return self.hard_weight * hard_loss + self.soft_weight * soft_loss_val

class KnowledgeDistillationTrainer(DetectionTrainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.teacher = YOLO(KD_CONFIG['teacher_model']).model.eval()
        for param in self.teacher.parameters(): param.requires_grad = False

    def get_loss(self, preds, batch):
        if not hasattr(self, 'kd_loss'):
            self.kd_loss = YoloKDLoss(self.model, KD_CONFIG['hard_weight'], KD_CONFIG['soft_weight'], KD_CONFIG['temperature'])
        if next(self.teacher.parameters()).device != self.device: self.teacher = self.teacher.to(self.device)
        with torch.no_grad(): teacher_preds = self.teacher(batch['img'])
        return self.kd_loss(preds, teacher_preds, batch)

# =====================================================================================
# 📝 3. DATASET & UTILITY FUNCTIONS (Unchanged)
# =====================================================================================
def create_optimized_yaml(data_path="yolo_params.yaml"):
    paths = [TRAIN_DATA_ROOT / "train_1/train1/images", TRAIN_DATA_ROOT / "train_1/val1/images", TEST_DATA_ROOT / "test1/images"]
    for p in paths:
        if not p.exists(): raise FileNotFoundError(f"Path '{p}' does not exist.")
    yaml_config = {'train': str(paths[0].resolve()), 'val': str(paths[1].resolve()), 'test': str(paths[2].resolve()), 'nc': len(CLASS_NAMES), 'names': CLASS_NAMES}
    GOOGLE_DRIVE_ROOT.mkdir(exist_ok=True, parents=True)
    yaml_path = GOOGLE_DRIVE_ROOT / data_path
    with open(yaml_path, 'w') as f: yaml.dump(yaml_config, f, default_flow_style=False, sort_keys=False)
    print(f"✅ Dataset YAML created: {yaml_path}")
    return str(yaml_path)

# =====================================================================================
# 🚀 4. CORE TRAINING FUNCTION (Unchanged)
# =====================================================================================
def train_enhanced_model():
    data_yaml = create_optimized_yaml()
    print("🚀 STARTING ENHANCED YOLO TRAINING")
    train_args = {
        'model': KD_CONFIG['student_model'], 'data': data_yaml,
        'project': str(GOOGLE_DRIVE_ROOT / 'runs' / 'detect'),
        'name': 'KD_l_to_n_v1', 'exist_ok': True, # New name for the new experiment
        'verbose': True, 'plots': True, **TRAINING_CONFIG
    }
    if KD_CONFIG['enable']:
        print(f"🧠 Training with Knowledge Distillation (Teacher: {KD_CONFIG['teacher_model']}, Student: {KD_CONFIG['student_model']})...")
        trainer = KnowledgeDistillationTrainer(overrides=train_args)
        trainer.train()
    else:
        model = YOLO(KD_CONFIG['student_model'])
        model.train(**train_args)
    print("✅ Training complete!")

# =====================================================================================
# 📊 5. PERFORMANCE & VISUALIZATION DASHBOARD (Unchanged)
# =====================================================================================
def generate_performance_report():
    print("\n" + "="*60)
    print("📊 GENERATING PERFORMANCE ANALYSIS DASHBOARD")
    print("="*60)

    runs_dir = GOOGLE_DRIVE_ROOT / "runs" / "detect"
    try:
        latest_run = max(runs_dir.glob('*'), key=os.path.getmtime)
        print(f"🔍 Analyzing latest run: {latest_run.name}")
    except ValueError:
        print("❌ No training runs found to analyze.")
        return

    results_csv = latest_run / 'results.csv'
    if results_csv.exists():
        df = pd.read_csv(results_csv)
        df.columns = df.columns.str.strip()
        final_metrics = df.iloc[-1]

        print("\n--- FINAL METRICS ---")
        print(f"   mAP@50:       {final_metrics['metrics/mAP50(B)']:.4f}")
        print(f"   mAP@50-95:    {final_metrics['metrics/mAP50-95(B)']:.4f}")
        print(f"   Precision:    {final_metrics['metrics/precision(B)']:.4f}")
        print(f"   Recall:       {final_metrics['metrics/recall(B)']:.4f}")
        print("-" * 23)

        plt.figure(figsize=(18, 5))
        plt.subplot(1, 3, 1)
        plt.plot(df['epoch'], df['train/box_loss'], label='Train Box Loss'); plt.plot(df['epoch'], df['val/box_loss'], label='Val Box Loss'); plt.title('Box Loss'); plt.legend(); plt.grid(True)
        plt.subplot(1, 3, 2)
        plt.plot(df['epoch'], df['train/cls_loss'], label='Train Class Loss'); plt.plot(df['epoch'], df['val/cls_loss'], label='Val Class Loss'); plt.title('Class Loss'); plt.legend(); plt.grid(True)
        plt.subplot(1, 3, 3)
        plt.plot(df['epoch'], df['train/dfl_loss'], label='Train DFL Loss'); plt.plot(df['epoch'], df['val/dfl_loss'], label='Val DFL Loss'); plt.title('DFL Loss'); plt.legend(); plt.grid(True)

        plt.suptitle('Training and Validation Loss Curves', fontsize=16); plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        loss_curve_path = latest_run / 'custom_loss_curves.png'
        plt.savefig(loss_curve_path); plt.show()
        print(f"\n✅ Custom loss curves saved to: {loss_curve_path}")

    key_images = { "Confusion Matrix": latest_run / 'confusion_matrix_normalized.png', "Precision-Recall Curve": latest_run / 'BoxPR_curve.png', "Results Grid": latest_run / 'val_batch0_pred.jpg'}
    for title, path in key_images.items():
        if path.exists():
            print(f"\n--- {title.upper()} ---"); display(Image.open(path))
        else:
            print(f"\n⚠️ {title} image not found at {path}")

# =====================================================================================
# 🚀 6. MAIN EXECUTION BLOCK (Unchanged)
# =====================================================================================
if __name__ == '__main__':
    try:
        print("📁 Mounting Google Drive...")
        from google.colab import drive
        drive.mount('/content/drive', force_remount=True)

        mode = EXECUTION_CONFIG.get('MODE', 'train').lower()
        print("=" * 60); print(f"🚀 RUNNING IN MODE: {mode.upper()}"); print("=" * 60)

        if mode == 'train':
            train_enhanced_model()
            generate_performance_report()
        elif mode == 'analyze':
            generate_performance_report()
        else:
            print(f"❌ Invalid mode '{mode}'. Choose 'train' or 'analyze'.")

    except Exception as e:
        import traceback
        print(f"\n❌ An error occurred: {e}"); traceback.print_exc()

    finally:
        print("\n🎉 Process finished.")

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
📁 Mounting Google Drive...
Mounted at /content/drive
🚀 RUNNING IN MODE: TRAIN
✅ Dataset YAML created: /content/drive/MyDrive/YOLO_Safety_Project_Nano_KD/yolo_params.yaml
🚀 STARTING ENHANCED YOLO TRAINING
🧠 Training with Knowledge Distillation (Teacher: yolov8l.pt, Student: yolov8n.pt)...
Ultralytics 8.3.202 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.05, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/content/drive/MyDrive/YOLO_Saf

In [1]:
# =====================================================================================
# 🚀 4. CORE TRAINING FUNCTION (MODIFIED TO RESUME)
# =====================================================================================
def train_enhanced_model():
    data_yaml = create_optimized_yaml()
    print("🚀 RESUMING ENHANCED YOLO TRAINING") # Message updated

    # ✅ DEFINE THE PATH TO YOUR LAST SAVED WEIGHTS
    resume_weights_path = "/content/drive/MyDrive/YOLO_Safety_Project_Nano_KD/runs/detect/KD_l_to_n_v1/weights/last.pt"

    train_args = {
        'model': resume_weights_path,  # ✅ CHANGE THIS to resume from your last checkpoint
        'data': data_yaml,
        'project': str(GOOGLE_DRIVE_ROOT / 'runs' / 'detect'),
        'name': 'KD_l_to_n_v1',
        'exist_ok': True,
        'verbose': True,
        'plots': True,
        **TRAINING_CONFIG
    }

    if KD_CONFIG['enable']:
        print(f"🧠 Resuming Training with Knowledge Distillation...")
        trainer = KnowledgeDistillationTrainer(overrides=train_args)
        # The trainer will automatically detect the epoch number and continue
        trainer.train()
    else:
        # This part is for standard training if you disable KD
        model = YOLO(resume_weights_path)
        model.train(**train_args)

    print("✅ Training complete!")

In [5]:
# =====================================================================================
#         EXPERT'S YOLO TRAINING SCRIPT (RESUMING FROM CHECKPOINT)
# =====================================================================================

# Standard library imports
import os
import sys
import yaml
import json
from pathlib import Path

# Third-party imports
import cv2
import torch
import torch.nn as nn
from ultralytics import YOLO
from ultralytics.utils.loss import v8DetectionLoss
from ultralytics.models.yolo.detect.train import DetectionTrainer
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image

# =====================================================================================
# 🔧 1. MASTER CONFIGURATION
# =====================================================================================

# ===> DATASET PATH CONFIGURATION <===
TRAIN_DATA_ROOT = Path("/content/drive/MyDrive/MICROSOFT HACK/hackathon2_train_1")
TEST_DATA_ROOT = Path("/content/drive/MyDrive/MICROSOFT HACK/Hackathon2_test1")

# ===> GOOGLE DRIVE CONFIGURATION <===
GOOGLE_DRIVE_ROOT = Path("/content/drive/MyDrive/YOLO_Safety_Project_Nano_KD")

# CLASS NAMES - 7 SAFETY OBJECTS
CLASS_NAMES = [
    'OxygenTank', 'NitrogenTank', 'FirstAidBox', 'FireAlarm',
    'SafetySwitchPanel', 'EmergencyPhone', 'FireExtinguisher'
]

# OPTIMIZED HYPERPARAMETERS FOR NANO MODEL
TRAINING_CONFIG = {
    'epochs': 60,
    'mosaic': 1.0,
    'optimizer': 'AdamW',
    'momentum': 0.937,
    'lr0': 0.01,
    'lrf': 0.01,
    'batch': 4,
    'imgsz': 640,
    'patience': 25,
    'warmup_epochs': 3,
    'weight_decay': 0.0005,
    'single_cls': False,
    'amp': False,
    'device': 'cpu',
    'cos_lr': True,
    'mixup': 0.05,
    'copy_paste': 0.05
}

# KNOWLEDGE DISTILLATION SETTINGS FOR SPEED & ACCURACY
KD_CONFIG = {
    'enable': True,
    'teacher_model': 'yolov8l.pt',
    'student_model': 'yolov8n.pt',
    'temperature': 4.0,
    'hard_weight': 0.7,
    'soft_weight': 0.3,
}

# =====================================================================================
# 🧠 2. KNOWLEDGE DISTILLATION IMPLEMENTATION (Unchanged)
# =====================================================================================
class YoloKDLoss(nn.Module):
    def __init__(self, student_model, hard_weight, soft_weight, temperature):
        super().__init__()
        self.hard_weight, self.soft_weight, self.temperature = hard_weight, soft_weight, temperature
        self.native_loss = v8DetectionLoss(student_model)
        self.kl_div = nn.KLDivLoss(reduction='batchmean')

    def forward(self, student_preds, teacher_preds, batch):
        hard_loss, soft_loss_val = self.native_loss(student_preds, batch), 0.0
        if all(len(p) > 0 and p[-1] is not None for p in [student_preds, teacher_preds]):
            student_logits, teacher_logits = student_preds[-1], teacher_preds[-1].detach()
            student_soft = torch.log_softmax(student_logits / self.temperature, dim=-1)
            teacher_soft = torch.softmax(teacher_logits / self.temperature, dim=-1)
            soft_loss_val = self.kl_div(student_soft, teacher_soft) * (self.temperature ** 2)
        return self.hard_weight * hard_loss + self.soft_weight * soft_loss_val

class KnowledgeDistillationTrainer(DetectionTrainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.teacher = YOLO(KD_CONFIG['teacher_model']).model.eval()
        for param in self.teacher.parameters(): param.requires_grad = False

    def get_loss(self, preds, batch):
        if not hasattr(self, 'kd_loss'):
            self.kd_loss = YoloKDLoss(self.model, KD_CONFIG['hard_weight'], KD_CONFIG['soft_weight'], KD_CONFIG['temperature'])
        if next(self.teacher.parameters()).device != self.device: self.teacher = self.teacher.to(self.device)
        with torch.no_grad(): teacher_preds = self.teacher(batch['img'])
        return self.kd_loss(preds, teacher_preds, batch)

# =====================================================================================
# 📝 3. DATASET & UTILITY FUNCTIONS (Unchanged)
# =====================================================================================
def create_optimized_yaml(data_path="yolo_params.yaml"):
    paths = [TRAIN_DATA_ROOT / "train_1/train1/images", TRAIN_DATA_ROOT / "train_1/val1/images", TEST_DATA_ROOT / "test1/images"]
    for p in paths:
        if not p.exists(): raise FileNotFoundError(f"Path '{p}' does not exist.")
    yaml_config = {'train': str(paths[0].resolve()), 'val': str(paths[1].resolve()), 'test': str(paths[2].resolve()), 'nc': len(CLASS_NAMES), 'names': CLASS_NAMES}
    GOOGLE_DRIVE_ROOT.mkdir(exist_ok=True, parents=True)
    yaml_path = GOOGLE_DRIVE_ROOT / data_path
    with open(yaml_path, 'w') as f: yaml.dump(yaml_config, f, default_flow_style=False, sort_keys=False)
    print(f"✅ Dataset YAML created: {yaml_path}")
    return str(yaml_path)

# =====================================================================================
# 🚀 4. CORE TRAINING FUNCTION (CORRECTED TO RESUME)
# =====================================================================================
def train_enhanced_model():
    data_yaml = create_optimized_yaml()
    print("🚀 RESUMING ENHANCED YOLO TRAINING")

    # Path to your last saved weights from the previous run
    resume_weights_path = "/content/drive/MyDrive/YOLO_Safety_Project_Nano_KD/runs/detect/KD_l_to_n_v1/weights/last.pt"

    train_args = {
        'model': resume_weights_path,  # Start from your last checkpoint
        'resume': True,                # ✅ The key change to force resuming the training state
        'data': data_yaml,
        'project': str(GOOGLE_DRIVE_ROOT / 'runs' / 'detect'),
        'name': 'KD_l_to_n_v1',
        'exist_ok': True,
        'verbose': True,
        'plots': True,
        **TRAINING_CONFIG
    }

    if KD_CONFIG['enable']:
        print(f"🧠 Resuming Training with Knowledge Distillation...")
        trainer = KnowledgeDistillationTrainer(overrides=train_args)
        trainer.train()
    else:
        model = YOLO(resume_weights_path)
        model.train(**train_args)

    print("✅ Training complete!")

# =====================================================================================
# 📊 5. PERFORMANCE & VISUALIZATION DASHBOARD (Unchanged)
# =====================================================================================
def generate_performance_report():
    print("\n" + "="*60)
    print("📊 GENERATING PERFORMANCE ANALYSIS DASHBOARD")
    print("="*60)

    runs_dir = GOOGLE_DRIVE_ROOT / "runs" / "detect"
    try:
        latest_run = max(runs_dir.glob('*'), key=os.path.getmtime)
        print(f"🔍 Analyzing latest run: {latest_run.name}")
    except ValueError:
        print("❌ No training runs found to analyze.")
        return

    results_csv = latest_run / 'results.csv'
    if results_csv.exists():
        df = pd.read_csv(results_csv)
        df.columns = df.columns.str.strip()
        final_metrics = df.iloc[-1]

        print("\n--- FINAL METRICS ---")
        print(f"   mAP@50:       {final_metrics['metrics/mAP50(B)']:.4f}")
        print(f"   mAP@50-95:    {final_metrics['metrics/mAP50-95(B)']:.4f}")
        print(f"   Precision:    {final_metrics['metrics/precision(B)']:.4f}")
        print(f"   Recall:       {final_metrics['metrics/recall(B)']:.4f}")
        print("-" * 23)

        plt.figure(figsize=(18, 5))
        plt.subplot(1, 3, 1)
        plt.plot(df['epoch'], df['train/box_loss'], label='Train Box Loss'); plt.plot(df['epoch'], df['val/box_loss'], label='Val Box Loss'); plt.title('Box Loss'); plt.legend(); plt.grid(True)
        plt.subplot(1, 3, 2)
        plt.plot(df['epoch'], df['train/cls_loss'], label='Train Class Loss'); plt.plot(df['epoch'], df['val/cls_loss'], label='Val Class Loss'); plt.title('Class Loss'); plt.legend(); plt.grid(True)
        plt.subplot(1, 3, 3)
        plt.plot(df['epoch'], df['train/dfl_loss'], label='Train DFL Loss'); plt.plot(df['epoch'], df['val/dfl_loss'], label='Val DFL Loss'); plt.title('DFL Loss'); plt.legend(); plt.grid(True)

        plt.suptitle('Training and Validation Loss Curves', fontsize=16); plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        loss_curve_path = latest_run / 'custom_loss_curves.png'
        plt.savefig(loss_curve_path); plt.show()
        print(f"\n✅ Custom loss curves saved to: {loss_curve_path}")

    key_images = { "Confusion Matrix": latest_run / 'confusion_matrix_normalized.png', "Precision-Recall Curve": latest_run / 'BoxPR_curve.png', "Results Grid": latest_run / 'val_batch0_pred.jpg'}
    for title, path in key_images.items():
        if path.exists():
            print(f"\n--- {title.upper()} ---"); display(Image.open(path))
        else:
            print(f"\n⚠️ {title} image not found at {path}")

# =====================================================================================
# 🚀 6. MAIN EXECUTION BLOCK
# =====================================================================================
if __name__ == '__main__':
    try:
        # This setup cell should be run once at the start of your notebook
        # !pip install -q ultralytics pandas matplotlib

        print("📁 Mounting Google Drive...")
        from google.colab import drive
        drive.mount('/content/drive', force_remount=True)

        # We assume we are always training/resuming in this context
        mode = 'train'
        print("=" * 60); print(f"🚀 RUNNING IN MODE: {mode.upper()}"); print("=" * 60)

        train_enhanced_model()
        generate_performance_report()

    except Exception as e:
        import traceback
        print(f"\n❌ An error occurred: {e}"); traceback.print_exc()

    finally:
        print("\n🎉 Process finished.")

Output hidden; open in https://colab.research.google.com to view.