# üöÄ HVAC AI ‚Äî Production-Ready YOLO11 Inference Server
**Optimized Turn-Key Backend/Inference Notebook**

---

## üìã Overview
Production-ready YOLO11 inference deployment with:
- ‚úÖ Comprehensive GPU & dependency validation
- ‚úÖ Optimized configuration management
- ‚úÖ Error handling & monitoring
- ‚úÖ Testing & benchmarking
- ‚úÖ Security best practices
- ‚úÖ Turn-key deployment

## üéØ Prerequisites
1. **GPU Runtime**: T4 or better (Runtime ‚Üí Change runtime type ‚Üí GPU)
2. **Trained Model**: YOLO11 `.pt` file in Google Drive
3. **Ngrok Token**: Free token from [ngrok.com](https://ngrok.com/)
4. **Test Image**: Sample HVAC blueprint


In [None]:
# Mount Google Drive for model access
from google.colab import drive
drive.mount('/content/drive')
print("‚úÖ Drive mounted at: /content/drive/MyDrive")

In [None]:
import sys
import os

print("="*70)
print("üîß Environment Setup & Validation")
print("="*70)

# Clone repository
print("\nüì¶ Cloning repository...")
!git clone https://github.com/elliotttmiller/hvac-ai.git 2>/dev/null || echo "Repository exists"
%cd hvac-ai

# Install dependencies
print("\nüìö Installing dependencies (2-3 minutes)...")
!pip install -q ultralytics>=8.0.0 fastapi>=0.115.0 uvicorn[standard]>=0.34.0
!pip install -q python-multipart>=0.0.9 pyngrok>=7.0.0 python-dotenv>=1.0.0
!pip install -q Pillow>=10.0.0 numpy>=1.24.0 tqdm>=4.65.0

# Validate environment
print("\nüîç System Validation")
print("="*70)

import torch
print(f"üêç Python: {sys.version.split()[0]}")
print(f"üî• PyTorch: {torch.__version__}")

if torch.cuda.is_available():
    print(f"\n‚úÖ GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"   CUDA: {torch.version.cuda}")
    # Test GPU
    test_tensor = torch.rand(1000, 1000).cuda()
    _ = torch.matmul(test_tensor, test_tensor)
    print(f"   Test: ‚úÖ PASSED")
else:
    print("\n‚ö†Ô∏è  WARNING: No GPU! Set Runtime > GPU. Inference will be SLOW.")

print("\n‚úÖ Environment Ready!")
print('=' * 70)

In [None]:
import os
from pathlib import Path

print("‚öôÔ∏è  Configuration")
print("="*70)

# --- UPDATE THESE VALUES ---
MODEL_PATH = "/content/drive/Shareddrives/HVAC/yolo11m_run_v10/weights/best.pt"
NGROK_AUTHTOKEN = "36hBoLt4A3L8yOYt96wKiCxxrwp_5wFbj1Frv6GoHARRQ6H6t"  # Get from ngrok.com

# Server settings
PORT = 8000
DEFAULT_CONF_THRESHOLD = 0.50
DEFAULT_IOU_THRESHOLD = 0.45
MAX_IMAGE_SIZE = 1024

# Validation
errors = []
if not MODEL_PATH or not os.path.exists(MODEL_PATH):
    errors.append("‚ùå MODEL_PATH invalid or not found")
else:
    print(f"‚úÖ Model: {MODEL_PATH}")
    print(f"   Size: {os.path.getsize(MODEL_PATH) / 1e6:.1f} MB")

if not NGROK_AUTHTOKEN or NGROK_AUTHTOKEN == "YOUR_NGROK_TOKEN_HERE":
    print("‚ö†Ô∏è  Ngrok token not set (optional, for public URL)")
else:
    print(f"‚úÖ Ngrok: {'*' * 20}{NGROK_AUTHTOKEN[-8:]}")

print(f"\nüéØ Inference: conf={DEFAULT_CONF_THRESHOLD}, iou={DEFAULT_IOU_THRESHOLD}, size={MAX_IMAGE_SIZE}")

# Write .env
with open('.env', 'w') as f:
    f.write(f"MODEL_PATH={MODEL_PATH}\nNGROK_AUTHTOKEN={NGROK_AUTHTOKEN}\nPORT={PORT}\n")

if errors:
    print("\n‚ùå Errors:", "\n".join(errors))
else:
    print("\n‚úÖ Configuration valid")
print("="*70)

In [None]:
import torch
import numpy as np
import time
from ultralytics import YOLO

print("ü§ñ Model Loading & Validation")
print("="*70)

print(f"\nüì• Loading model (10-30s)...")
start = time.time()
model = YOLO(MODEL_PATH)
print(f"‚úÖ Loaded in {time.time() - start:.2f}s")

print(f"\nüìä Model Info:")
print(f"   Device: {model.device}")
print(f"   Classes: {len(model.names)}")
for idx, name in model.names.items():
    print(f"   [{idx}] {name}")

if torch.cuda.is_available():
    model.to('cuda')
    print(f"\nüöÄ Model on GPU")

# Warm-up
print(f"\nüî• Warm-up inference...")
dummy = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
start = time.time()
_ = model.predict(dummy, verbose=False, conf=0.25)
first_time = time.time() - start
start = time.time()
_ = model.predict(dummy, verbose=False, conf=0.25)
second_time = time.time() - start

print(f"   First: {first_time*1000:.1f}ms")
print(f"   Subsequent: {second_time*1000:.1f}ms (~{1.0/second_time:.0f} FPS)")

if torch.cuda.is_available():
    print(f"\nüíæ GPU Memory: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")

print("\n‚úÖ Model ready!")
print("="*70)

In [None]:
from google.colab import files
from PIL import Image
import matplotlib.pyplot as plt
import time
import numpy as np # Added import for numpy

print("üß™ Test Inference")
print("="*70)

print("\nüì§ Upload test image...")
uploaded = files.upload()

if uploaded:
    img_path = list(uploaded.keys())[0]
    img = Image.open(img_path).convert('RGB')
    img_array = np.array(img)

    print(f"\nüìä Image: {img.size[0]}x{img.size[1]}")

    print(f"\nüîÑ Running inference...")
    start = time.time()
    results = model.predict(img_array, conf=DEFAULT_CONF_THRESHOLD,
                           iou=DEFAULT_IOU_THRESHOLD, imgsz=MAX_IMAGE_SIZE, verbose=False)
    inf_time = (time.time() - start) * 1000

    result = results[0]
    boxes = result.boxes

    print(f"\n‚úÖ Complete: {inf_time:.1f}ms ({1000.0/inf_time:.1f} FPS)")
    if boxes is not None:
        print(f"   Detections: {len(boxes)}")
    else:
        print(f"   Detections: 0 (No objects detected)")


    if boxes is not None and len(boxes) > 0:
        class_counts = {}
        for box in boxes:
            cls_id = int(box.cls[0])
            name = model.names[cls_id]
            class_counts[name] = class_counts.get(name, 0) + 1
        print(f"\nüìä By Class:")
        for name, count in sorted(class_counts.items()):
            print(f"   {name}: {count}")

    # Visualize
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
    ax1.imshow(img_array)
    ax1.set_title('Original')
    ax1.axis('off')
    ax2.imshow(result.plot())
    # Ensure title is also conditional on boxes being present for count
    if boxes is not None:
        ax2.set_title(f'{len(boxes)} detections ({inf_time:.0f}ms)')
    else:
        ax2.set_title(f'0 detections ({inf_time:.0f}ms)')
    ax2.axis('off')
    plt.tight_layout()
    plt.show()
else:
    print("‚ùå No image uploaded")

In [None]:
from pyngrok import ngrok
from dotenv import load_dotenv
import os

print("üöÄ Deploying API Server")
print("="*70)

load_dotenv()

# Validate configuration
if not os.path.exists(MODEL_PATH):
    print("\n‚ùå ERROR: MODEL_PATH not found. Check configuration.")
    raise FileNotFoundError(f"Model not found: {MODEL_PATH}")

print(f"‚úÖ Model found: {MODEL_PATH}")

# Setup ngrok tunnel
if NGROK_AUTHTOKEN and NGROK_AUTHTOKEN != "YOUR_NGROK_TOKEN_HERE":
    print("\nüåê Setting up ngrok tunnel...")
    ngrok.set_auth_token(NGROK_AUTHTOKEN)
    public_url = ngrok.connect(PORT)
    print(f"\n‚úÖ API LIVE!")
    print(f"   Public URL: {public_url.public_url}")
    print(f"   API Docs: {public_url.public_url}/docs")
    print(f"   Health: {public_url.public_url}/health")
else:
    print("\n‚ö†Ô∏è  No ngrok token - server will be local only")
    print(f"   Local URL: http://localhost:{PORT}")

print("\n" + "="*70)
print("üé¨ Starting server (Press STOP button to shutdown)...")
print("="*70 + "\n")

# Validate python-services directory exists
if not os.path.exists('python-services'):
    print("‚ùå ERROR: python-services directory not found")
    print(f"   Current directory: {os.getcwd()}")
    print("   Please ensure you're in the hvac-ai repository root")
    raise FileNotFoundError("python-services directory not found")

%cd python-services
# Use PORT variable via Python string formatting
import subprocess
subprocess.run(["uvicorn", "hvac_analysis_service:app", "--host", "0.0.0.0", "--port", str(PORT), "--reload"])