# üöÄ HVAC AI ‚Äî Production-Ready YOLO11 Inference Server
**Optimized Turn-Key Backend/Inference Notebook**

---

## üìã Overview
Production-ready YOLO11 inference deployment with:
- ‚úÖ Comprehensive GPU & dependency validation
- ‚úÖ Optimized configuration management
- ‚úÖ Error handling & monitoring
- ‚úÖ Testing & benchmarking
- ‚úÖ Security best practices
- ‚úÖ Turn-key deployment

## üéØ Prerequisites
1. **GPU Runtime**: T4 or better (Runtime ‚Üí Change runtime type ‚Üí GPU)
2. **Trained Model**: YOLO11 `.pt` file in Google Drive
3. **Ngrok Token**: Free token from [ngrok.com](https://ngrok.com/)
4. **Test Image**: Sample HVAC blueprint


In [None]:
# Mount Google Drive for model access
from google.colab import drive
drive.mount('/content/drive')
print("‚úÖ Drive mounted at: /content/drive/MyDrive")

In [None]:
import sys
import os

print("="*70)
print("üîß Environment Setup & Validation")
print("="*70)

# Clone repository
print("\nüì¶ Cloning repository...")
!git clone https://github.com/elliotttmiller/hvac-ai.git 2>/dev/null || echo "Repository exists"
%cd hvac-ai

# Install dependencies
print("\nüìö Installing dependencies (2-3 minutes)...")
!pip install -q ultralytics>=8.0.0 fastapi>=0.115.0 uvicorn[standard]>=0.34.0
!pip install -q python-multipart>=0.0.9 pyngrok>=7.0.0 python-dotenv>=1.0.0
!pip install -q Pillow>=10.0.0 numpy>=1.24.0 tqdm>=4.65.0

# Validate environment
print("\nüîç System Validation")
print("="*70)

import torch
print(f"üêç Python: {sys.version.split()[0]}")
print(f"üî• PyTorch: {torch.__version__}")

if torch.cuda.is_available():
    print(f"\n‚úÖ GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"   CUDA: {torch.version.cuda}")
    # Test GPU
    test_tensor = torch.rand(1000, 1000).cuda()
    _ = torch.matmul(test_tensor, test_tensor)
    print(f"   Test: ‚úÖ PASSED")
else:
    print("\n‚ö†Ô∏è  WARNING: No GPU! Set Runtime > GPU. Inference will be SLOW.")

print("\n‚úÖ Environment Ready!")
print('=' * 70)

In [None]:
import os
from pathlib import Path

print("‚öôÔ∏è  Configuration")
print("="*70)

# --- UPDATE THESE VALUES ---
MODEL_PATH = "/content/drive/Shareddrives/HVAC/DECEMBER 24 OUTPUT WEIGHTS {dataset2}/hvac_obb_l_20251224_214011/weights/best.pt"
NGROK_AUTHTOKEN = "36hBoLt4A3L8yOYt96wKiCxxrwp_5wFbj1Frv6GoHARRQ6H6t"  # Get from ngrok.com

# Server settings
PORT = 8000
DEFAULT_CONF_THRESHOLD = 0.75  # Increased from 0.50 for higher precision
DEFAULT_IOU_THRESHOLD = 0.65   # Increased from 0.45 for better box filtering
MAX_IMAGE_SIZE = 1024

# Validation
errors = []
if not MODEL_PATH or not os.path.exists(MODEL_PATH):
    errors.append("‚ùå MODEL_PATH invalid or not found")
else:
    print(f"‚úÖ Model: {MODEL_PATH}")
    print(f"   Size: {os.path.getsize(MODEL_PATH) / 1e6:.1f} MB")

if not NGROK_AUTHTOKEN or NGROK_AUTHTOKEN == "YOUR_NGROK_TOKEN_HERE":
    print("‚ö†Ô∏è  Ngrok token not set (optional, for public URL)")
else:
    print(f"‚úÖ Ngrok: {'*' * 20}{NGROK_AUTHTOKEN[-8:]}")

print(f"\nüéØ Inference: conf={DEFAULT_CONF_THRESHOLD}, iou={DEFAULT_IOU_THRESHOLD}, size={MAX_IMAGE_SIZE}")

# Write .env
with open('.env', 'w') as f:
    f.write(f"MODEL_PATH={MODEL_PATH}\nNGROK_AUTHTOKEN={NGROK_AUTHTOKEN}\nPORT={PORT}\n")

if errors:
    print("\n‚ùå Errors:", "\n".join(errors))
else:
    print("\n‚úÖ Configuration valid")
print("="*70)

In [None]:
import torch
import numpy as np
import time
from ultralytics import YOLO

print("ü§ñ Model Loading & Validation")
print("="*70)

print(f"\nüì• Loading model (10-30s)...")
start = time.time()
model = YOLO(MODEL_PATH)
print(f"‚úÖ Loaded in {time.time() - start:.2f}s")

print(f"\nüìä Model Info:")
print(f"   Device: {model.device}")
print(f"   Classes: {len(model.names)}")
for idx, name in model.names.items():
    print(f"   [{idx}] {name}")

if torch.cuda.is_available():
    model.to('cuda')
    print(f"\nüöÄ Model on GPU")

# Warm-up
print(f"\nüî• Warm-up inference...")
dummy = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
start = time.time()
_ = model.predict(dummy, verbose=False, conf=0.25)
first_time = time.time() - start
start = time.time()
_ = model.predict(dummy, verbose=False, conf=0.25)
second_time = time.time() - start

print(f"   First: {first_time*1000:.1f}ms")
print(f"   Subsequent: {second_time*1000:.1f}ms (~{1.0/second_time:.0f} FPS)")

if torch.cuda.is_available():
    print(f"\nüíæ GPU Memory: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")

print("\n‚úÖ Model ready!")
print("="*70)

In [None]:
from google.colab import files
from PIL import Image
import matplotlib.pyplot as plt
import time
import numpy as np
import cv2
from ultralytics import YOLO
import os
from matplotlib.patches import Patch

print("üöÄ HVAC AI ‚Äî Fully Synchronized Inference Pipeline")
print("="*75)
print("‚úÖ Features: Guaranteed sync | No text labels | Professional legend | OBB compatible")
print("="*75)

# =====================================================================
# CONFIGURATION
# =====================================================================
print("\nüîß Configuration & Setup")
print("="*50)

# Load environment variables
try:
    from dotenv import load_dotenv
    load_dotenv()
    print("‚úÖ Environment variables loaded")
except ImportError:
    print("‚ö†Ô∏è python-dotenv not installed, using defaults")

# Configuration with fallbacks
MODEL_PATH = os.getenv('MODEL_PATH', "/content/drive/Shareddrives/HVAC/yolo11m_run_v10/weights/best.pt")
DEFAULT_CONF_THRESHOLD = float(os.getenv('DEFAULT_CONF_THRESHOLD', '0.35'))
DEFAULT_IOU_THRESHOLD = float(os.getenv('DEFAULT_IOU_THRESHOLD', '0.45'))
MAX_IMAGE_SIZE = int(os.getenv('MAX_IMAGE_SIZE', '1024'))

print(f"üìã Configuration:")
print(f"   Model Path: {MODEL_PATH}")
print(f"   Confidence Threshold: {DEFAULT_CONF_THRESHOLD:.2f}")
print(f"   IOU Threshold: {DEFAULT_IOU_THRESHOLD:.2f}")
print(f"   Max Image Size: {MAX_IMAGE_SIZE}")

# Validate model path
if not os.path.exists(MODEL_PATH):
    print(f"\n‚ùå ERROR: Model not found at {MODEL_PATH}")
    # Try to mount drive
    try:
        from google.colab import drive
        print("\nüîÑ Attempting to mount Google Drive...")
        drive.mount('/content/drive', force_remount=True)
        if os.path.exists(MODEL_PATH):
            print("‚úÖ Drive mounted successfully")
        else:
            print(f"‚ùå Model still not found at {MODEL_PATH}")
            raise FileNotFoundError(f"Model not found: {MODEL_PATH}")
    except Exception as e:
        print(f"‚ùå Failed to mount drive: {str(e)}")
        raise
else:
    print(f"‚úÖ Model found: {os.path.getsize(MODEL_PATH) / 1e6:.1f} MB")

# =====================================================================
# MODEL LOADING
# =====================================================================
print("\n" + "="*50)
print("üß† Model Loading & Validation")
print("="*50)

try:
    print("üì• Loading model...")
    model = YOLO(MODEL_PATH)
    print("‚úÖ Model loaded successfully")

    # Verify model type
    is_obb = hasattr(model, 'predict_obb') or 'obb' in MODEL_PATH.lower()
    print(f"üîç Model Type: {'OBB' if is_obb else 'Standard'}")

    # Print class information
    print("\nüìã Model Class Information:")
    print(f"   Total classes: {len(model.names)}")
    for idx, name in model.names.items():
        print(f"   [{idx}] {name}")

    # GPU setup
    if torch.cuda.is_available():
        model.to('cuda')
        print("üöÄ Model moved to GPU")
    else:
        print("‚ö†Ô∏è CPU Only Mode - Inference will be slower")
except Exception as e:
    print(f"‚ùå Error loading model: {str(e)}")
    raise

# =====================================================================
# IMAGE UPLOAD & PROCESSING
# =====================================================================
print("\n" + "="*50)
print("üñºÔ∏è Image Upload & Processing")
print("="*50)

print("\nüì§ Upload test image...")
uploaded = files.upload()

if not uploaded:
    print("‚ùå No image uploaded. Please upload an image to continue.")
    raise ValueError("No image uploaded")

img_path = list(uploaded.keys())[0]
print(f"‚úÖ Image uploaded: {img_path}")

try:
    img = Image.open(img_path).convert('RGB')
    img_array = np.array(img)

    print(f"\nüìä Image Analysis:")
    print(f"   Dimensions: {img.size[0]}x{img.size[1]} pixels")
    print(f"   Mode: {img.mode}")

    # Check if image needs resizing
    max_dim = max(img.size)
    if max_dim > MAX_IMAGE_SIZE:
        scale_factor = MAX_IMAGE_SIZE / max_dim
        new_size = (int(img.size[0] * scale_factor), int(img.size[1] * scale_factor))
        print(f"   üîç Resizing from {img.size} to {new_size}")
        img = img.resize(new_size, Image.LANCZOS)
        img_array = np.array(img)

    # Basic image quality check
    mean_pixel = np.mean(img_array)
    std_pixel = np.std(img_array)
    print(f"\nüîç Image Quality Check:")
    print(f"   Brightness: {mean_pixel:.1f}")
    print(f"   Contrast: {std_pixel:.1f}")

    if mean_pixel < 50 or mean_pixel > 200:
        print("   ‚ö†Ô∏è Unusual brightness - may affect detection")
    if std_pixel < 30:
        print("   ‚ö†Ô∏è Low contrast - may affect detection")

except Exception as e:
    print(f"‚ùå Error processing image: {str(e)}")
    raise

# =====================================================================
# INFERENCE & SYNCHRONIZED DETECTION COUNTING
# =====================================================================
print("\n" + "="*50)
print("üîç Inference & Synchronized Detection Counting")
print("="*50)

try:
    print(f"\nüîÑ Running inference with conf={DEFAULT_CONF_THRESHOLD}...")
    start = time.time()

    # Run inference
    results = model.predict(img_array,
                           conf=DEFAULT_CONF_THRESHOLD,
                           iou=DEFAULT_IOU_THRESHOLD,
                           imgsz=MAX_IMAGE_SIZE,
                           verbose=False)

    inf_time = (time.time() - start) * 1000
    result = results[0]

    # CRITICAL: Synchronized detection counting
    # Check for both standard and OBB results
    if hasattr(result, 'boxes') and result.boxes is not None and len(result.boxes) > 0:
        boxes = result.boxes
        detection_count = len(boxes)
        print(f"‚úÖ Detections found: {detection_count}")
    elif hasattr(result, 'obb') and result.obb is not None and len(result.obb) > 0:
        boxes = result.obb
        detection_count = len(boxes)
        print(f"‚úÖ Detections found: {detection_count} (OBB model)")
    else:
        boxes = None
        detection_count = 0
        print("‚ö†Ô∏è No detections found")

    print(f"‚úÖ Complete: {inf_time:.1f}ms ({1000.0/inf_time:.1f} FPS)")

    # Class breakdown
    class_counts = {}
    if detection_count > 0:
        for box in boxes:
            cls_id = int(box.cls[0])
            name = model.names[cls_id]
            class_counts[name] = class_counts.get(name, 0) + 1

        print(f"\nüìä Detection Breakdown by Class:")
        for name, count in sorted(class_counts.items()):
            print(f"   {name}: {count}")
    else:
        print("\nüìä No components detected")

except Exception as e:
    print(f"‚ùå Error during inference: {str(e)}")
    raise

# =====================================================================
# VISUALIZATION & LEGEND (FULLY SYNCHRONIZED)
# =====================================================================
print("\n" + "="*50)
print("üé® Visualization & Legend (Fully Synchronized)")
print("="*50)

try:
    # Create visualization with no text labels
    plot_img = img_array.copy()

    # Draw bounding boxes with colors but no text
    if detection_count > 0:
        # Custom high-contrast color palette
        colors = [
            (255, 0, 0),       # Red
            (0, 255, 0),       # Green
            (0, 0, 255),       # Blue
            (255, 255, 0),     # Yellow
            (255, 0, 255),     # Magenta
            (0, 255, 255),     # Cyan
            (128, 0, 0),       # Maroon
            (0, 128, 0),       # Dark Green
            (0, 0, 128),       # Navy
            (128, 128, 0),     # Olive
            (128, 0, 128),     # Purple
            (0, 128, 128),     # Teal
        ]

        # Draw all boxes
        for i, box in enumerate(boxes):
            # Get coordinates
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            # Get class ID and color
            cls_id = int(box.cls[0])
            color = colors[cls_id % len(colors)]

            # Draw rectangle
            cv2.rectangle(plot_img, (x1, y1), (x2, y2), color, 2)

    # Create figure with legend
    fig = plt.figure(figsize=(24, 10))
    gs = fig.add_gridspec(1, 3, width_ratios=[1, 1.2, 0.4])

    ax1 = fig.add_subplot(gs[0])
    ax2 = fig.add_subplot(gs[1])
    ax3 = fig.add_subplot(gs[2])

    # Original image
    ax1.imshow(img_array)
    ax1.set_title('Original Image', fontsize=14)
    ax1.axis('off')

    # Result with bounding boxes (NO TEXT)
    ax2.imshow(plot_img)
    ax2.set_title(f'Detections: {detection_count} ({inf_time:.0f}ms)', fontsize=14)
    ax2.axis('off')

    # Legend panel
    ax3.axis('off')
    legend_elements = []

    # Add legend entries
    if detection_count > 0:
        for cls_id, cls_name in model.names.items():
            if cls_id < len(colors):
                color = colors[cls_id]
                normalized_color = (color[0]/255, color[1]/255, color[2]/255)

                # Count detections
                count = class_counts.get(cls_name, 0)

                if count > 0:  # Only show classes that were detected
                    legend_elements.append(Patch(
                        facecolor=normalized_color,
                        edgecolor='black',
                        label=f"{cls_name} ({count})"
                    ))

    # Create legend
    if legend_elements:
        ax3.legend(
            handles=legend_elements,
            loc='center',
            fontsize=10,
            frameon=True,
            framealpha=0.95,
            title="HVAC Components",
            title_fontsize=12,
            facecolor='white'
        )
        ax3.set_title("Detection Legend", fontsize=14, pad=20)
    else:
        ax3.text(0.5, 0.5, "No components detected",
                 ha='center', va='center', fontsize=14)
        ax3.text(0.5, 0.4, "Check model compatibility",
                 ha='center', va='center', fontsize=12)
        ax3.set_title("No Detections", fontsize=14, pad=20)

    plt.tight_layout(pad=3.0)
    plt.savefig('/content/synchronized_inference_result.png', bbox_inches='tight', dpi=150)
    print("‚úÖ Synchronized visualization saved to /content/synchronized_inference_result.png")
    plt.show()

except Exception as e:
    print(f"‚ùå Error during visualization: {str(e)}")
    raise

# =====================================================================
# FINAL SUMMARY
# =====================================================================
print("\n" + "="*50)
print("‚úÖ Final Summary")
print("="*50)
print(f"üéØ Total Detections: {detection_count}")
print(f"‚ö° Inference Time: {inf_time:.1f}ms")
print(f"üìä Class Distribution: {len(class_counts)} classes detected")
print(f"üíæ Results saved to: /content/synchronized_inference_result.png")

if detection_count == 0:
    print("\nüö® CRITICAL: No detections found")
    print("   Possible causes:")
    print("   1. Model incompatible with diagram style")
    print("   2. Confidence threshold too high")
    print("   3. Image quality issues")
    print("   4. Model not trained on this component type")
else:
    print("\nüéâ SUCCESS: Detections properly synchronized with visualization")
    print("   The legend now accurately reflects the detected components")

print("\n" + "="*50)
print("üí° Pro Tips for Production:")
print("   ‚Ä¢ Always count detections immediately after inference")
print("   ‚Ä¢ Use the same result object for visualization and counting")
print("   ‚Ä¢ Verify model compatibility with your diagram style")
print("   ‚Ä¢ Save this pipeline as a reusable function")
print("="*75)

In [None]:
from pyngrok import ngrok
from dotenv import load_dotenv
import os

print("üöÄ Deploying API Server")
print("="*70)

load_dotenv()

# Validate configuration
if not os.path.exists(MODEL_PATH):
    print("\n‚ùå ERROR: MODEL_PATH not found. Check configuration.")
    raise FileNotFoundError(f"Model not found: {MODEL_PATH}")

print(f"‚úÖ Model found: {MODEL_PATH}")

# Setup ngrok tunnel
if NGROK_AUTHTOKEN and NGROK_AUTHTOKEN != "YOUR_NGROK_TOKEN_HERE":
    print("\nüåê Setting up ngrok tunnel...")
    ngrok.set_auth_token(NGROK_AUTHTOKEN)
    public_url = ngrok.connect(PORT)
    print(f"\n‚úÖ API LIVE!")
    print(f"   Public URL: {public_url.public_url}")
    print(f"   API Docs: {public_url.public_url}/docs")
    print(f"   Health: {public_url.public_url}/health")
else:
    print("\n‚ö†Ô∏è  No ngrok token - server will be local only")
    print(f"   Local URL: http://localhost:{PORT}")

print("\n" + "="*70)
print("üé¨ Starting server (Press STOP button to shutdown)...")
print("="*70 + "\n")

# Validate python-services directory exists
if not os.path.exists('python-services'):
    print("‚ùå ERROR: python-services directory not found")
    print(f"   Current directory: {os.getcwd()}")
    print("   Please ensure you're in the hvac-ai repository root")
    raise FileNotFoundError("python-services directory not found")

%cd python-services
# Use PORT variable via Python string formatting
import subprocess
subprocess.run(["uvicorn", "hvac_analysis_service:app", "--host", "0.0.0.0", "--port", str(PORT), "--reload"])