In [None]:
# ============================================================================
# CELL 1: Setup and Installation
# ============================================================================

print("🏠 Roof Damage Object Detection Training")
print("🔧 MacBook Pro M4 Optimized")
print("=" * 60)

# Install required packages
print("📦 Installing YOLOv8...")
# Uncomment the line below on first run
# !pip install ultralytics

# Check if installation was successful
try:
    from ultralytics import YOLO
    import torch
    print("✅ YOLOv8 installed successfully")
    print(f"✅ PyTorch version: {torch.__version__}")
    print(f"✅ MPS available: {torch.backends.mps.is_available()}")
except ImportError as e:
    print(f"❌ Installation failed: {e}")
    print("💡 Run: pip install ultralytics")

# Additional imports
import os
import matplotlib.pyplot as plt
import cv2
import numpy as np
from pathlib import Path
import json

In [None]:
# ============================================================================
# CELL 2: Dataset Verification
# ============================================================================

print("\n🔍 Verifying Dataset Structure...")

# Check if dataset exists
dataset_path = "roof_damage_final"  # Change this to your dataset name
if not os.path.exists(dataset_path):
    print(f"❌ Dataset not found at: {dataset_path}")
    print("💡 Make sure you've run the data pipeline notebook first")
    print("💡 Available datasets:")
    for item in os.listdir('.'):
        if os.path.isdir(item) and any(sub in item.lower() for sub in ['roof', 'detection', 'dataset']):
            print(f"   - {item}")
else:
    print(f"✅ Dataset found: {dataset_path}")

# Verify dataset structure
required_dirs = [
    f"{dataset_path}/images/train",
    f"{dataset_path}/images/val", 
    f"{dataset_path}/labels/train",
    f"{dataset_path}/labels/val",
    f"{dataset_path}/data.yaml"
]

print("\n📁 Checking dataset structure:")
for dir_path in required_dirs:
    if os.path.exists(dir_path):
        if dir_path.endswith('.yaml'):
            print(f"✅ {dir_path}")
        else:
            count = len([f for f in os.listdir(dir_path) if not f.startswith('.')])
            print(f"✅ {dir_path} ({count} files)")
    else:
        print(f"❌ Missing: {dir_path}")

# Load and display dataset info
if os.path.exists(f"{dataset_path}/dataset_info.json"):
    with open(f"{dataset_path}/dataset_info.json", 'r') as f:
        dataset_info = json.load(f)
    
    print(f"\n📊 Dataset Statistics:")
    print(f"   Classes: {dataset_info['classes']}")
    print(f"   Training images: {dataset_info['statistics']['train_images']}")
    print(f"   Validation images: {dataset_info['statistics']['val_images']}")
    print(f"   Total bounding boxes: {dataset_info['statistics']['total_boxes']}")

In [None]:
# Cell 3: Evaluate results
model.val()


In [None]:
# Cell 4: Test inference
results = model.predict('test_image.jpg')

In [None]:
# Complete YOLOv8 Object Detection Training for Roof Damage Detection
# MacBook Pro M4 Optimized

# ============================================================================
# CELL 1: Setup and Installation
# ============================================================================

print("🏠 Roof Damage Object Detection Training")
print("🔧 MacBook Pro M4 Optimized")
print("=" * 60)

# Install required packages
print("📦 Installing YOLOv8...")
# Uncomment the line below on first run
!pip install ultralytics

# Check if installation was successful
try:
    from ultralytics import YOLO
    import torch
    print("✅ YOLOv8 installed successfully")
    print(f"✅ PyTorch version: {torch.__version__}")
    print(f"✅ MPS available: {torch.backends.mps.is_available()}")
except ImportError as e:
    print(f"❌ Installation failed: {e}")
    print("💡 Run: pip install ultralytics")

# Additional imports
import os
import matplotlib.pyplot as plt
import cv2
import numpy as np
from pathlib import Path
import json

In [None]:
# CELL 2: Dataset Verification
# ============================================================================

print("\n🔍 Verifying Dataset Structure...")

# Check if dataset exists
project_path = "projects/roof_shingle_inspection" # Change this to the project folder
dataset_path = f"{project_path}/pipeline"  # Change this to your dataset name
if not os.path.exists(dataset_path):
    print(f"❌ Dataset not found at: {dataset_path}")
    print("💡 Make sure you've run the data pipeline notebook first")
    print("💡 Available datasets:")
    for item in os.listdir('.'):
        if os.path.isdir(item) and any(sub in item.lower() for sub in ['roof', 'detection', 'dataset']):
            print(f"   - {item}")
else:
    print(f"✅ Dataset found: {dataset_path}")

# Verify dataset structure
required_dirs = [
    f"{dataset_path}/images/train",
    f"{dataset_path}/images/val", 
    f"{dataset_path}/labels/train",
    f"{dataset_path}/labels/val",
    f"{dataset_path}/data.yaml"
]

print("\n📁 Checking dataset structure:")
for dir_path in required_dirs:
    if os.path.exists(dir_path):
        if dir_path.endswith('.yaml'):
            print(f"✅ {dir_path}")
        else:
            count = len([f for f in os.listdir(dir_path) if not f.startswith('.')])
            print(f"✅ {dir_path} ({count} files)")
    else:
        print(f"❌ Missing: {dir_path}")

# Load and display dataset info
if os.path.exists(f"{dataset_path}/dataset_info.json"):
    with open(f"{dataset_path}/dataset_info.json", 'r') as f:
        dataset_info = json.load(f)
    
    print(f"\n📊 Dataset Statistics:")
    print(f"   Classes: {dataset_info['classes']}")
    print(f"   Training images: {dataset_info['statistics']['train_images']}")
    print(f"   Validation images: {dataset_info['statistics']['val_images']}")
    print(f"   Total bounding boxes: {dataset_info['statistics']['total_boxes']}")

In [None]:
# CELL 3: Model Configuration and Training Setup
# ============================================================================

print("\n🎯 Setting up YOLOv8 Training...")

# Training configuration optimized for M4 Pro and small dataset
training_config = {
    'model_size': 'yolov8n',         # Nano version - fastest for small datasets
    'epochs': 10,                       # More epochs for small dataset
    'batch_size': 8,                    # Conservative for M4 Pro
    'image_size': 640,                  # Standard YOLO input size
    'device': 'mps',                    # Apple Silicon GPU
    'learning_rate': 0.01,              # Default learning rate
    'patience': 20,                     # Early stopping patience
    'save_period': 10,                  # Save checkpoint every 10 epochs
}

print("⚙️  Training Configuration:")
for key, value in training_config.items():
    print(f"   {key}: {value}")

# Create training directory
training_dir = f"{project_path}/training"
experiment_name = f"v1"
os.makedirs(training_dir, exist_ok=True)

print(f"\n📂 Training output: {training_dir}/{experiment_name}")

In [None]:
# CELL 4: Data Augmentation Preview (Optional)
# ============================================================================

print("\n🖼️  Previewing Data Augmentation...")

def preview_augmentation(dataset_path, num_samples=3):
    """Show original images and potential augmentations"""
    
    train_images_dir = f"{dataset_path}/images/train"
    image_files = [f for f in os.listdir(train_images_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    if len(image_files) == 0:
        print("❌ No images found in training directory")
        return
    
    # Show a few sample images
    fig, axes = plt.subplots(1, min(num_samples, len(image_files)), figsize=(15, 5))
    if num_samples == 1:
        axes = [axes]
    
    for i, image_file in enumerate(image_files[:num_samples]):
        image_path = os.path.join(train_images_dir, image_file)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        axes[i].imshow(image)
        axes[i].set_title(f"Training Image {i+1}")
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    print(f"📸 Showing {min(num_samples, len(image_files))} sample training images")
    print("💡 YOLO will automatically apply augmentations during training:")
    print("   - Random rotations, flips, and crops")
    print("   - Color jittering and contrast changes") 
    print("   - Mosaic and mixup augmentations")

# Run preview
if os.path.exists(f"{dataset_path}/images/train"):
    preview_augmentation(dataset_path)

In [None]:
# CELL 5: Training Execution
# ============================================================================

print("\n🚀 Starting YOLOv8 Training...")
print("⏱️  Expected training time on M4 Pro: 15-25 minutes")
print("💾 Progress will be saved automatically")

# Initialize model
print(f"🔄 Loading pre-trained model: {training_config['model_size']}")
model = YOLO(training_config['model_size'])

# Check data.yaml path
data_yaml_path = f"{dataset_path}/data.yaml"
if not os.path.exists(data_yaml_path):
    print(f"❌ data.yaml not found at: {data_yaml_path}")
    print("💡 Make sure you've run the data pipeline conversion")
else:
    print(f"✅ Using dataset config: {data_yaml_path}")

# Start training
print("\n🏋️  Training started...")
print("📊 Monitor progress below:")

try:
    results = model.train(
        data=data_yaml_path,
        epochs=training_config['epochs'],
        imgsz=training_config['image_size'],
        batch=training_config['batch_size'],
        device=training_config['device'],
        project=training_dir,
        name=experiment_name,
        save_period=training_config['save_period'],
        patience=training_config['patience'],
        verbose=True,
        # Augmentation settings for small dataset
        hsv_h=0.015,        # Hue augmentation
        hsv_s=0.7,          # Saturation augmentation  
        hsv_v=0.4,          # Value augmentation
        degrees=10,         # Rotation degrees
        translate=0.1,      # Translation
        scale=0.9,          # Scale
        shear=0.0,          # Shear
        perspective=0.0,    # Perspective
        flipud=0.0,         # Flip up-down
        fliplr=0.5,         # Flip left-right
        mosaic=1.0,         # Mosaic augmentation
        mixup=0.1,          # Mixup augmentation
        copy_paste=0.1      # Copy-paste augmentation
    )
    
    print("\n🎉 Training completed successfully!")
    
except Exception as e:
    print(f"\n❌ Training failed: {e}")
    print("💡 Try reducing batch size or checking dataset format")

In [None]:
# CELL 6: Training Results Analysis
# ============================================================================

print("\n📈 Analyzing Training Results...")

# Path to training results
results_path = f"{training_dir}/{experiment_name}"

if os.path.exists(results_path):
    # Display training curves
    print("📊 Training Curves:")
    
    # Check for results images
    results_images = [
        'results.png',          # Overall results
        'confusion_matrix.png', # Confusion matrix  
        'val_batch0_pred.png',  # Validation predictions
        'train_batch0.png'      # Training batch sample
    ]
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    axes = axes.flatten()
    
    for i, img_name in enumerate(results_images):
        img_path = os.path.join(results_path, img_name)
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            axes[i].imshow(img)
            axes[i].set_title(img_name.replace('.png', '').replace('_', ' ').title())
            axes[i].axis('off')
        else:
            axes[i].text(0.5, 0.5, f'{img_name}\nNot Found', 
                        ha='center', va='center', transform=axes[i].transAxes)
            axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # Show best model metrics
    weights_path = os.path.join(results_path, 'weights', 'best.pt')
    if os.path.exists(weights_path):
        print(f"✅ Best model saved: {weights_path}")
        
        # Load best model for evaluation
        best_model = YOLO(weights_path)
        
        # Validate on test set
        print("\n🧪 Validating best model...")
        val_results = best_model.val()
        
        print("📊 Validation Metrics:")
        print(f"   mAP50: {val_results.box.map50:.3f}")
        print(f"   mAP50-95: {val_results.box.map:.3f}")
        print(f"   Precision: {val_results.box.mp:.3f}")
        print(f"   Recall: {val_results.box.mr:.3f}")
    
else:
    print(f"❌ Training results not found at: {results_path}")

In [None]:
# CELL 7: Inference Testing
# ============================================================================

print("\n🔮 Testing Model Inference...")

# Load the best trained model
weights_path = f"{training_dir}/{experiment_name}/weights/best.pt"

if os.path.exists(weights_path):
    print(f"🔄 Loading trained model: {weights_path}")
    trained_model = YOLO(weights_path)
    
    # Test on validation images
    val_images_dir = f"{dataset_path}/images/val"
    if os.path.exists(val_images_dir):
        val_images = [f for f in os.listdir(val_images_dir) 
                     if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        
        print(f"🧪 Testing on {len(val_images)} validation images...")
        
        # Run inference on a few validation images
        test_images = val_images[:3]  # Test first 3 images
        
        for i, img_name in enumerate(test_images):
            img_path = os.path.join(val_images_dir, img_name)
            
            print(f"\n🖼️  Testing: {img_name}")
            
            # Run inference
            results = trained_model.predict(
                img_path,
                save=True,
                project=f"{training_dir}/inference", 
                name=f"test_{i+1}",
                conf=0.25,  # Confidence threshold
                verbose=False
            )
            
            # Display results
            if len(results) > 0 and len(results[0].boxes) > 0:
                boxes = results[0].boxes
                print(f"   Detected {len(boxes)} objects:")
                
                for j, box in enumerate(boxes):
                    class_id = int(box.cls[0])
                    confidence = float(box.conf[0])
                    class_name = dataset_info['classes'][class_id] if 'dataset_info' in locals() else f"class_{class_id}"
                    print(f"     {j+1}. {class_name}: {confidence:.2f}")
            else:
                print("   No objects detected")
        
        print(f"\n💾 Inference results saved to: {training_dir}/inference/")
    
else:
    print(f"❌ Trained model not found: {weights_path}")
    print("💡 Make sure training completed successfully")

In [None]:
# CELL 8: Client Demo Setup
# ============================================================================

print("\n🎬 Setting up Client Demo...")

def create_demo_inference_function(model_path, class_names):
    """Create a simple function for client demonstrations"""
    
    if not os.path.exists(model_path):
        print(f"❌ Model not found: {model_path}")
        return None
    
    demo_model = YOLO(model_path)
    
    def detect_roof_damage(image_path, confidence_threshold=0.25):
        """
        Detect roof damage in an image
        
        Args:
            image_path (str): Path to image file
            confidence_threshold (float): Minimum confidence for detections
            
        Returns:
            dict: Detection results with bounding boxes and classifications
        """
        
        # Run inference
        results = demo_model.predict(
            image_path,
            conf=confidence_threshold,
            verbose=False
        )
        
        # Parse results
        detections = []
        if len(results) > 0 and len(results[0].boxes) > 0:
            boxes = results[0].boxes
            
            for box in boxes:
                class_id = int(box.cls[0])
                confidence = float(box.conf[0])
                bbox = box.xyxy[0].tolist()  # [x1, y1, x2, y2]
                
                detection = {
                    'class': class_names[class_id],
                    'confidence': confidence,
                    'bbox': bbox,
                    'severity': class_names[class_id]
                }
                detections.append(detection)
        
        return {
            'image_path': image_path,
            'detections': detections,
            'damage_found': len(detections) > 0,
            'total_issues': len(detections)
        }
    
    return detect_roof_damage

# Create demo function
if os.path.exists(weights_path) and 'dataset_info' in locals():
    print("🔧 Creating demo inference function...")
    
    demo_function = create_demo_inference_function(
        weights_path, 
        dataset_info['classes']
    )
    
    if demo_function:
        print("✅ Demo function created successfully!")
        print("\n💡 Usage example:")
        print("   result = demo_function('path/to/roof/image.jpg')")
        print("   print(f'Found {result[\"total_issues\"]} damage areas')")
        
        # Test demo function on a validation image
        if os.path.exists(val_images_dir) and len(val_images) > 0:
            test_image = os.path.join(val_images_dir, val_images[0])
            demo_result = demo_function(test_image)
            
            print(f"\n🧪 Demo test result:")
            print(f"   Image: {os.path.basename(demo_result['image_path'])}")
            print(f"   Damage found: {demo_result['damage_found']}")
            print(f"   Total issues: {demo_result['total_issues']}")
            
            for i, detection in enumerate(demo_result['detections']):
                print(f"   Detection {i+1}: {detection['class']} ({detection['confidence']:.2f})")

In [None]:
# CELL 9: Export Model for Deployment
# ============================================================================

print("\n📦 Exporting Model for Deployment...")

if os.path.exists(weights_path):
    export_model = YOLO(weights_path)
    
    # Export formats suitable for different deployment scenarios
    export_formats = {
        'onnx': 'Cross-platform deployment',
        'coreml': 'iOS/macOS deployment', 
        'torchscript': 'PyTorch deployment'
    }
    
    print("🔄 Available export formats:")
    for fmt, description in export_formats.items():
        print(f"   {fmt}: {description}")
    
    # Export to ONNX (most versatile)
    try:
        print(f"\n📤 Exporting to ONNX format...")
        onnx_path = export_model.export(format='onnx')
        print(f"✅ ONNX model exported: {onnx_path}")
    except Exception as e:
        print(f"⚠️  ONNX export failed: {e}")
    
    # Export to CoreML for Apple ecosystem
    try:
        print(f"\n📤 Exporting to CoreML format...")
        coreml_path = export_model.export(format='coreml')
        print(f"✅ CoreML model exported: {coreml_path}")
        print("💡 This model can be used in iOS/macOS apps")
    except Exception as e:
        print(f"⚠️  CoreML export failed: {e}")

In [None]:
# CELL 10: Project Summary and Next Steps
# ============================================================================

print("\n" + "="*60)
print("🎉 ROOF DAMAGE DETECTION PROJECT COMPLETE!")
print("="*60)

# Project summary
summary = {
    'Dataset': f"{dataset_path} ({dataset_info['statistics']['total_boxes']} annotations)" if 'dataset_info' in locals() else "Unknown",
    'Model': 'YOLOv8 Nano',
    'Training Time': '~20 minutes on M4 Pro',
    'Status': 'Ready for client demo' if os.path.exists(weights_path) else 'Training incomplete'
}

print("\n📊 Project Summary:")
for key, value in summary.items():
    print(f"   {key}: {value}")

print(f"\n📁 Key Files:")
if os.path.exists(weights_path):
    print(f"   Trained Model: {weights_path}")
print(f"   Training Results: {training_dir}/{experiment_name}/")
print(f"   Dataset: {dataset_path}/")

print(f"\n🎯 Client Demo Ready:")
print(f"   ✅ Object detection model trained")
print(f"   ✅ Bounding box detection working") 
print(f"   ✅ Multiple damage severity levels")
print(f"   ✅ Inference function created")
print(f"   ✅ Visualization tools available")

print(f"\n🚀 Next Steps:")
print(f"   1. Test model on new roof images")
print(f"   2. Collect feedback from client")
print(f"   3. Gather more training data (100-500 images)")
print(f"   4. Retrain for production deployment")
print(f"   5. Deploy to client's preferred platform")

print(f"\n💡 For Production:")
print(f"   • Collect 100-500 more annotated images")
print(f"   • Train YOLOv8s or YOLOv8m for better accuracy")
print(f"   • Implement automated damage assessment")
print(f"   • Deploy via API or mobile app")

print("\n🏠 Ready to demonstrate professional roof damage detection!")

In [None]:
# Single Image Demo
client_photo = "projects/roof_shingle_inspection/inference/roof_damage_demo.png"
result = demo_function(client_photo)

if result['damage_found']:
    print(f"🚨 DAMAGE DETECTED: {result['total_issues']} areas of concern")
    for detection in result['detections']:
        severity = detection['class']
        confidence = detection['confidence']
        print(f"  • {severity} (confidence: {confidence:.1%})")
else:
    print("✅ No damage detected in this roof section")