# üè† DeepRoof-2026: Master Training Lab

This is the professional-grade training environment for the **DeepRoof-2026 AI Roof Layout Engine**. This notebook is configured for **Training From Scratch** on the OmniCity dataset using Swin-L backbones on A100 hardware.

### üõ† Step 1: Terminal-Enforced Environment Initialization
This cell activates the project's virtual environment and forcefully fixes the **mmsegmentation** version assertions on disk.

In [None]:
import os
import sys
import subprocess
import torch
from pathlib import Path

# --- 1. PROJECT PATHS ---
project_root = Path("/workspace/roof")
if not project_root.exists():
    project_root = Path(os.getcwd()).parent

venv_path = project_root / "venv"
if not venv_path.exists():
    venv_path = project_root / ".venv"

# Pre-emptively add venv site-packages to path
if venv_path.exists():
    lib_dir = list(venv_path.glob("lib/python*/site-packages"))
    if lib_dir:
        if str(lib_dir[0]) not in sys.path:
            sys.path.insert(0, str(lib_dir[0]))
            print(f"‚úÖ Activated Venv Site: {lib_dir[0]}")
        sys.executable = str(venv_path / "bin" / "python")

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
    print(f"üìÇ Project Root: {project_root}")

# --- 2. TERMINAL-LEVEL PATCHING (FORCE DISK WRITE) ---
def apply_hammer_patch():
    print("ü©π Checking for mmsegmentation assertions...")
    # Common sites in container vs venv
    target_sites = [
        str(project_root / "venv/lib/python3.11/site-packages/mmseg/__init__.py"),
        str(project_root / ".venv/lib/python3.11/site-packages/mmseg/__init__.py"),
        "/usr/local/lib/python3.11/dist-packages/mmseg/__init__.py"
    ]
    
    unlocked_content = """# Copyright (c) OpenMMLab. All rights reserved.
import mmcv
import mmengine
from mmengine.utils import digit_version
from .version import __version__, version_info
MMCV_MIN = '2.0.0rc4'
MMCV_MAX = '2.2.0'
MMENGINE_MIN = '0.7.1'
MMENGINE_MAX = '1.0.0'
mmcv_min_version = digit_version(MMCV_MIN)
mmcv_max_version = digit_version('9.9.9') # OVERRIDE by DeepRoof
mmcv_version = digit_version(mmcv.__version__)
mmengine_min_version = digit_version(MMENGINE_MIN)
mmengine_max_version = digit_version('9.9.9') # OVERRIDE by DeepRoof
mmengine_version = digit_version(mmengine.__version__)
__all__ = ['__version__', 'version_info', 'digit_version']
"""
    
    modified = False
    for target in target_sites:
        if os.path.exists(target):
            with open(target, 'r') as f: content = f.read()
            if "assert (mmcv_min_version" in content:
                print(f"üî• Removing assertions from: {target}")
                with open(target, 'w') as f: f.write(unlocked_content)
                modified = True
    
    if modified:
        print("‚ö†Ô∏è DISK STATE UPDATED. PLEASE RESTART KERNEL.")
        return False
    
    # --- 3. HARDWARE-AWARE LIBCUDART LINKER ---
    cuda_available = torch.cuda.is_available()
    print(f"üöÄ CUDA Access: {cuda_available}")
    
    if cuda_available:
        try: 
            from mmcv.ops import point_sample
        except ImportError as e:
            if "libcudart.so" in str(e):
                print("üì¶ Installing OS-level CUDA runtime into venv...")
                subprocess.check_call([sys.executable, "-m", "pip", "install", "nvidia-cuda-runtime-cu11"])
                return False
    
    print("‚úÖ Environment is Live and Synchronized.")
    return True

if apply_hammer_patch():
    print(f"‚úÖ Kernel Ready | {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU Mode'}")

## üìÇ 1. Dataset Preview

Visualize the **OmniCity** satellite imagery and ground truth **Masks** + **Surface Normals**.

In [None]:
def preview_dataset(num_samples=3):
    import matplotlib.pyplot as plt
    import numpy as np
    import cv2
    
    data_path = project_root / "data/OmniCity"
    train_file = data_path / 'train.txt'
    
    if not train_file.exists():
        print(f"‚ùå Multi-task training data not found at {data_path}. Ensure extraction is complete.")
        return
        
    with open(train_file, 'r') as f:
        sample_ids = [line.strip() for line in f.readlines()[:num_samples]]
    
    fig, axes = plt.subplots(num_samples, 3, figsize=(15, 5 * num_samples))
    for i, sid in enumerate(sample_ids):
        img = cv2.cvtColor(cv2.imread(str(data_path / 'images' / (sid + '.jpg'))), cv2.COLOR_BGR2RGB)
        
        mask = cv2.imread(str(data_path / 'masks' / (sid + '.png')), cv2.IMREAD_UNCHANGED)
        mask_vis = cv2.applyColorMap(((mask % 20) * 12).astype(np.uint8), cv2.COLORMAP_JET)
        
        axes[i, 0].imshow(img); axes[i, 0].set_title(f"Original: {sid}"); axes[i, 0].axis('off')
        axes[i, 1].imshow(mask_vis); axes[i, 1].set_title("Plane Segmentation"); axes[i, 1].axis('off')
        
        norm_path = data_path / 'normals' / (sid + '.npy')
        if norm_path.exists():
            normals = np.load(str(norm_path))
            axes[i, 2].imshow(((normals + 1) * 127.5).astype(np.uint8))
        axes[i, 2].set_title("Surface Normals"); axes[i, 2].axis('off')
        
    plt.tight_layout(); plt.show()

preview_dataset(num_samples=2)

## ‚öôÔ∏è 2. Scratch Training Configuration (Epoch-Based)

We are using the **MASTER EPOCH-BASED SCRATCH PROFILE**:
- **Duration**: 150 Epochs (~160k steps).
- **Val Interval**: Every 1 Epoch (Reports results per-epoch).
- **No Pre-Training**: `load_from = None`.
- **Checkpoints**: Interval snapshots every 5 epochs + `best_mIoU.pth`.

In [None]:
from mmengine.config import Config

CONFIG_PATH = str(project_root / "configs/deeproof_scratch_swin_L.py")
WORK_DIR = str(project_root / "work_dirs/swin_l_scratch_v1")

cfg = Config.fromfile(CONFIG_PATH)
cfg.work_dir = WORK_DIR
cfg.data_root = str(project_root / "data/OmniCity/")
cfg.train_dataloader.dataset.data_root = cfg.data_root
cfg.val_dataloader.dataset.data_root = cfg.data_root

print(f"üèÜ MASTER SCRATCH CONFIG LOADED")
print(f"üìç Work Directory: {WORK_DIR}")
print(f"üî• Max Epochs: {cfg.train_cfg.max_epochs}")
print(f"üìâ Initial LR: {cfg.optimizer.lr}")
print(f"üìä Reporting Interval: Every Epoch")

## üöÄ 3. Kickoff Training

This will invoke the `mmengine.Runner` and begin the full model convergence process. **Detailed stats will print to this output at the end of every epoch.**

In [None]:
import torch
from mmengine.runner import Runner

print(f"üöÄ Starting Epoch-Based Master Trainer on: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

runner = Runner.from_cfg(cfg)
runner.train()

## üìä 4. Monitoring & Metrics

Run this cell during or after training to visualize performance trends from the `scalar.json` logs.

In [None]:
import json
import matplotlib.pyplot as plt

def plot_training_logs(log_path):
    if not os.path.exists(log_path):
        print("üïí No logs found yet. Start training first!")
        return
        
    iters, losses, miou = [], [], []
    with open(log_path, 'r') as f:
        for line in f:
            data = json.loads(line)
            if 'loss' in data:
                iters.append(data.get('step', data.get('iter')))
                losses.append(data['loss'])
            if 'mIoU' in data:
                miou.append(data['mIoU'])
                
    fig, ax1 = plt.subplots(figsize=(10, 5))
    ax1.plot(iters, losses, color='red', label='Total Loss')
    ax1.set_xlabel('Progress'); ax1.set_ylabel('Loss', color='red')
    
    if miou:
        ax2 = ax1.twinx()
        ax2.plot(iters[::len(iters)//len(miou)], miou, color='blue', label='mIoU')
        ax2.set_ylabel('mIoU Accuracy', color='blue')
        
    plt.title("DeepRoof Scratch Training Progress Tracker"); plt.show()

# Update path as training generates timestamped folders
# log_json = glob.glob(os.path.join(WORK_DIR, "*/vis_data/scalars.json"))
# if log_json: plot_training_logs(log_json[-1])