# üõ∞Ô∏è Satellite Imagery Inference - Economic Activity Extraction

Extract economic activity metrics from satellite imagery using trained YOLO models.

## Purpose:
Transform satellite imagery ‚Üí Activity metrics for economic forecasting

## Models Used:
- **Retail**: Car counting (41.5% mAP)
- **Ports**: Ship/vehicle/harbor detection (72.0% mAP)  
- **City**: Vehicle density (training)

## Outputs:
- `data/features/satellite/retail_activity.csv`
- `data/features/satellite/port_activity.csv`
- `data/features/satellite/city_activity.csv`
- `data/features/satellite/industrial_activity.csv`

## Time Required:
**2-4 hours** (depends on GPU)

---

## 1. Setup & Imports

In [None]:
import os
from pathlib import Path
from typing import Dict, List, Tuple
import pandas as pd
import numpy as np
from datetime import datetime
from ultralytics import YOLO
from tqdm.notebook import tqdm
import cv2
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Imports successful")
print(f"üìÖ Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 2. Configuration

In [None]:
# Paths
PROJECT_ROOT = Path.cwd().parent
DATA_DIR = PROJECT_ROOT / "data"
RAW_SATELLITE = DATA_DIR / "raw" / "satellite"
FEATURES_DIR = DATA_DIR / "features" / "satellite"
MODELS_DIR = DATA_DIR / "models" / "satellite"

# Create output directory
FEATURES_DIR.mkdir(parents=True, exist_ok=True)

# Model paths
MODELS = {
    'retail': MODELS_DIR / "retail_yolo11_20251126_150811" / "weights" / "best.pt",
    'ports': MODELS_DIR / "ports_dota_yolo11_20251127_013205" / "weights" / "best.pt",
    'city': MODELS_DIR / "city_yolo11_20251127_184743" / "weights" / "best.pt",
}

# Class names for each model
CLASS_NAMES = {
    'retail': ['car', 'equipment'],
    'ports': ['ship', 'harbor', 'large-vehicle', 'small-vehicle', 'storage-tank'],
    'city': ['car', 'truck', 'warehouse'],
}

# Years to process
YEARS = [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]

# Inference settings
INFERENCE_CONFIG = {
    'conf': 0.25,  # Confidence threshold
    'iou': 0.45,   # NMS IoU threshold
    'imgsz': 640,  # Image size for inference
    'device': 0,   # GPU device (0 for first GPU, 'cpu' for CPU)
    'verbose': False,
}

print(f"üìÅ Output directory: {FEATURES_DIR}")
print(f"üìä Models available: {[k for k, v in MODELS.items() if v.exists()]}")
print(f"üìÖ Years to process: {YEARS}")
print(f"‚öôÔ∏è  Device: {'GPU' if INFERENCE_CONFIG['device'] == 0 else 'CPU'}")

## 3. Location Definitions

In [None]:
# Location categories
LOCATION_CATEGORIES = {
    'retail': [
        'Mall_of_America', 'Westfield_Century_City', 'The_Grove_LA',
        'South_Coast_Plaza', 'Tysons_Corner_Center', 'King_of_Prussia',
        'Roosevelt_Field', 'Westfield_London', 'Bluewater', 'Trafford_Centre',
        'Galeries_Lafayette', 'La_Maquinista', 'Centro_Oberhausen',
        'Pacific_Mall_Delhi', 'Select_Citywalk', 'Mall_of_Asia_Manila'
    ],
    'ports': [
        'Port_of_Los_Angeles', 'Port_of_Long_Beach', 'Port_of_New_York_New_Jersey',
        'Port_of_Savannah', 'Port_of_Houston', 'Port_of_Rotterdam',
        'Port_of_Antwerp', 'Port_of_Hamburg', 'Port_of_Valencia',
        'Port_of_Shanghai', 'Port_of_Ningbo', 'Port_of_Singapore',
        'Port_of_Busan', 'Port_of_Hong_Kong', 'Port_of_Durban',
        'Port_of_Mombasa', 'Port_of_Lagos', 'Port_of_Jebel_Ali',
        'Port_of_Salalah'
    ],
    'industrial': [
        'Shenzhen_Electronics', 'Suzhou_Industrial_Park', 'Pune_Hinjawadi',
        'Detroit_Auto', 'Tijuana_Manufacturing'
    ],
    'city': [
        'Los_Angeles', 'New_York_City', 'Chicago', 'London', 'Paris',
        'Tokyo', 'Beijing', 'Mumbai', 'Johannesburg', 'Sao_Paulo'
    ]
}

for category, locations in LOCATION_CATEGORIES.items():
    print(f"{category.upper()}: {len(locations)} locations")

## 4. Helper Functions

In [None]:
def load_model(model_path: Path, model_name: str) -> YOLO:
    """Load a YOLO model."""
    if not model_path.exists():
        print(f"‚ö†Ô∏è  Model not found: {model_path}")
        return None
    
    print(f"üì• Loading {model_name} model...")
    model = YOLO(str(model_path))
    print(f"   ‚úÖ {model_name.upper()} model loaded")
    return model


def find_images(location: str, year: int) -> List[Path]:
    """Find all imagery files for a location and year."""
    images = []
    
    # Check NAIP (US locations only, high-res)
    naip_path = RAW_SATELLITE / "naip" / str(year)
    if naip_path.exists():
        naip_files = list(naip_path.glob(f"{location}_naip_{year}.tif"))
        images.extend(naip_files)
    
    # Check Sentinel-2 (all locations, 10m resolution)
    sentinel_path = RAW_SATELLITE / "sentinel-2-l2a" / str(year)
    if sentinel_path.exists():
        sentinel_files = list(sentinel_path.glob(f"{location}_sentinel-2-l2a_{year}.tif"))
        images.extend(sentinel_files)
    
    return images


def process_image(image_path: Path, model: YOLO, config: Dict) -> pd.DataFrame:
    """Run inference on a single image and return detections as DataFrame."""
    try:
        img = cv2.imread(str(image_path))
        if img is None:
            return pd.DataFrame()
        
        results = model(
            img,
            conf=config['conf'],
            iou=config['iou'],
            imgsz=config['imgsz'],
            device=config['device'],
            verbose=config['verbose']
        )[0]
        
        if len(results.boxes) == 0:
            return pd.DataFrame()
        
        detections = []
        for box in results.boxes:
            detection = {
                'class_id': int(box.cls.item()),
                'class_name': results.names[int(box.cls.item())],
                'confidence': float(box.conf.item()),
                'x1': float(box.xyxy[0][0]),
                'y1': float(box.xyxy[0][1]),
                'x2': float(box.xyxy[0][2]),
                'y2': float(box.xyxy[0][3]),
            }
            detections.append(detection)
        
        return pd.DataFrame(detections)
    
    except Exception as e:
        print(f"‚ùå Error: {e}")
        return pd.DataFrame()


def aggregate_detections(
    detections: pd.DataFrame,
    location: str,
    year: int,
    month: int,
    activity_type: str
) -> Dict:
    """Aggregate raw detections into activity metrics."""
    metric = {
        'location': location,
        'year': year,
        'month': month,
        'date': f"{year}-{month:02d}-15",
        'activity_type': activity_type,
        'total_objects': len(detections) if not detections.empty else 0,
    }
    
    # Class-specific counts
    if not detections.empty:
        class_counts = detections['class_name'].value_counts().to_dict()
    else:
        class_counts = {}
    
    for class_name in CLASS_NAMES.get(activity_type, []):
        metric[f'{class_name}_count'] = class_counts.get(class_name, 0)
    
    # Activity-specific metrics
    if activity_type == 'retail':
        metric['car_density'] = metric.get('car_count', 0)
        metric['parking_activity_index'] = metric['car_density'] + metric.get('equipment_count', 0) * 0.5
    
    elif activity_type == 'ports':
        ships = metric.get('ship_count', 0)
        large_vehicles = metric.get('large-vehicle_count', 0)
        small_vehicles = metric.get('small-vehicle_count', 0)
        metric['congestion_index'] = ships * 2.0 + large_vehicles * 1.0 + small_vehicles * 0.5
        metric['port_activity_index'] = ships + large_vehicles + small_vehicles + metric.get('storage-tank_count', 0)
    
    elif activity_type == 'city':
        cars = metric.get('car_count', 0)
        trucks = metric.get('truck_count', 0)
        metric['vehicle_density'] = cars + trucks
        metric['traffic_index'] = cars + trucks * 1.5
        metric['logistics_activity'] = trucks + metric.get('warehouse_count', 0)
    
    elif activity_type == 'industrial':
        trucks = metric.get('truck_count', 0)
        warehouses = metric.get('warehouse_count', 0)
        metric['industrial_activity_index'] = trucks * 2.0 + warehouses * 1.0
        metric['logistics_intensity'] = trucks
    
    return metric

print("‚úÖ Helper functions defined")

## 5. Main Inference Function

In [None]:
def run_inference_for_activity(
    activity_type: str,
    model: YOLO,
    locations: List[str],
    years: List[int]
) -> pd.DataFrame:
    """Run inference for all locations and years for a specific activity type."""
    print(f"\n{'='*80}")
    print(f"üöÄ Running Inference: {activity_type.upper()}")
    print(f"{'='*80}")
    print(f"Locations: {len(locations)}")
    print(f"Years: {years}")
    print(f"Total combinations: {len(locations) * len(years)}")
    
    all_metrics = []
    
    total_tasks = len(locations) * len(years)
    with tqdm(total=total_tasks, desc=f"{activity_type.upper()}", unit="img") as pbar:
        for location in locations:
            for year in years:
                images = find_images(location, year)
                
                if not images:
                    pbar.update(1)
                    continue
                
                for image_path in images:
                    detections = process_image(image_path, model, INFERENCE_CONFIG)
                    month = 6  # Mid-year
                    metric = aggregate_detections(detections, location, year, month, activity_type)
                    all_metrics.append(metric)
                
                pbar.update(1)
    
    df = pd.DataFrame(all_metrics)
    
    if not df.empty:
        print(f"\n‚úÖ {activity_type.upper()} Inference Complete!")
        print(f"   Records: {len(df)}")
        print(f"   Date range: {df['year'].min()}-{df['year'].max()}")
        print(f"   Locations: {df['location'].nunique()}")
        print(f"   Total objects detected: {df['total_objects'].sum():,}")
    
    return df

print("‚úÖ Main inference function defined")

## 6. Load Models

In [None]:
print("\n" + "="*80)
print("üì• LOADING MODELS")
print("="*80)

models = {}
for activity_type, model_path in MODELS.items():
    model = load_model(model_path, activity_type)
    if model is not None:
        models[activity_type] = model

if not models:
    print("\n‚ùå No models found! Please train models first.")
else:
    print(f"\n‚úÖ Loaded {len(models)} models: {list(models.keys())}")

## 7. Run Inference

### 7.1 Retail Activity

In [None]:
if 'retail' in models:
    retail_df = run_inference_for_activity(
        'retail',
        models['retail'],
        LOCATION_CATEGORIES['retail'],
        YEARS
    )
    
    # Save
    output_path = FEATURES_DIR / "retail_activity.csv"
    retail_df.to_csv(output_path, index=False)
    print(f"\nüíæ Saved: {output_path}")
    
    # Preview
    display(retail_df.head(10))
else:
    print("‚ö†Ô∏è  Retail model not available")

### 7.2 Port Activity

In [None]:
if 'ports' in models:
    ports_df = run_inference_for_activity(
        'ports',
        models['ports'],
        LOCATION_CATEGORIES['ports'],
        YEARS
    )
    
    # Save
    output_path = FEATURES_DIR / "port_activity.csv"
    ports_df.to_csv(output_path, index=False)
    print(f"\nüíæ Saved: {output_path}")
    
    # Preview
    display(ports_df.head(10))
else:
    print("‚ö†Ô∏è  Ports model not available")

### 7.3 City Activity

In [None]:
if 'city' in models:
    city_df = run_inference_for_activity(
        'city',
        models['city'],
        LOCATION_CATEGORIES['city'],
        YEARS
    )
    
    # Save
    output_path = FEATURES_DIR / "city_activity.csv"
    city_df.to_csv(output_path, index=False)
    print(f"\nüíæ Saved: {output_path}")
    
    # Preview
    display(city_df.head(10))
else:
    print("‚ö†Ô∏è  City model not available (may still be training)")

### 7.4 Industrial Activity

Uses city model (trucks + warehouses)

In [None]:
if 'city' in models:
    industrial_df = run_inference_for_activity(
        'industrial',
        models['city'],  # Use city model
        LOCATION_CATEGORIES['industrial'],
        YEARS
    )
    
    # Save
    output_path = FEATURES_DIR / "industrial_activity.csv"
    industrial_df.to_csv(output_path, index=False)
    print(f"\nüíæ Saved: {output_path}")
    
    # Preview
    display(industrial_df.head(10))
else:
    print("‚ö†Ô∏è  City model not available for industrial inference")

## 8. Final Summary

In [None]:
print("\n" + "="*80)
print("üéâ INFERENCE COMPLETE!")
print("="*80)
print(f"End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"\nResults saved to: {FEATURES_DIR}")

# List saved files
saved_files = list(FEATURES_DIR.glob("*_activity.csv"))
print(f"\nSaved files:")
for file in saved_files:
    size_kb = file.stat().st_size / 1024
    print(f"  ‚úÖ {file.name} ({size_kb:.1f} KB)")

print("\n" + "="*80)
print("üìä NEXT STEPS:")
print("="*80)
print("1. Review output CSV files")
print("2. Run Analyze_Activity_Metrics.ipynb")
print("3. Download economic indicators (FRED)")
print("4. Train forecasting models")
print("="*80 + "\n")