# 🚢 Demo 3: AIS Maritime Data

**Processing ship tracking data from NOAA**

## What is AIS?
- **Automatic Identification System** - GPS tracking for ships
- Ships broadcast position, speed, heading, vessel type
- NOAA collects and provides historical data

## What We're Doing:
- Load AIS data for Port of LA region
- Extract ship counts and vessel types
- Visualize maritime traffic patterns

---

In [None]:
# Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import zipfile
from pathlib import Path
from datetime import datetime

PROJECT_ROOT = Path.cwd().parent.parent
print("✅ Setup complete")

---
## 1️⃣ Understanding AIS Data

```
Ship broadcasts every few seconds:
┌─────────────────────────────────────────────────┐
│ MMSI: 123456789 (unique ship ID)                │
│ LAT: 33.7234, LON: -118.2654                    │
│ SOG: 12.5 knots (speed over ground)             │
│ COG: 275° (course over ground)                  │
│ VesselType: 70 (cargo ship)                     │
│ VesselName: "EVER GIVEN"                        │
└─────────────────────────────────────────────────┘
```

In [None]:
# AIS Vessel Type Codes
print("📋 AIS VESSEL TYPE CODES")
print("="*50)

vessel_types = {
    '70-79': 'Cargo vessels',
    '80-89': 'Tankers',
    '60-69': 'Passenger vessels',
    '30': 'Fishing vessels',
    '31-32': 'Tug boats',
    '36-37': 'Sailing vessels',
}

for code, desc in vessel_types.items():
    print(f"   {code}: {desc}")

print("\n💡 Cargo (70-79) and Tankers (80-89) are key economic indicators!")

---
## 2️⃣ Load AIS Data

In [None]:
# Check available AIS data
ais_dir = PROJECT_ROOT / 'data' / 'raw' / 'ais' / 'noaa_daily'

print("📂 AVAILABLE AIS DATA")
print("="*50)

if ais_dir.exists():
    for year_dir in sorted(ais_dir.iterdir()):
        if year_dir.is_dir():
            files = list(year_dir.glob('*.zip'))
            if files:
                total_size = sum(f.stat().st_size for f in files) / (1024*1024)
                print(f"   {year_dir.name}: {len(files)} files ({total_size:.1f} MB)")
else:
    print("   ⚠️ AIS data directory not found")
    print("   Creating sample data for demo...")

In [None]:
# Port of LA bounding box
PORT_LA_BOUNDS = {
    'min_lat': 33.65,
    'max_lat': 33.85,
    'min_lon': -118.35,
    'max_lon': -118.15
}

print("📍 PORT OF LA BOUNDING BOX")
print("="*50)
print(f"   Latitude:  {PORT_LA_BOUNDS['min_lat']}° to {PORT_LA_BOUNDS['max_lat']}°")
print(f"   Longitude: {PORT_LA_BOUNDS['min_lon']}° to {PORT_LA_BOUNDS['max_lon']}°")

# Visualize on a simple map
fig, ax = plt.subplots(figsize=(10, 8))

# Draw bounding box
from matplotlib.patches import Rectangle
rect = Rectangle(
    (PORT_LA_BOUNDS['min_lon'], PORT_LA_BOUNDS['min_lat']),
    PORT_LA_BOUNDS['max_lon'] - PORT_LA_BOUNDS['min_lon'],
    PORT_LA_BOUNDS['max_lat'] - PORT_LA_BOUNDS['min_lat'],
    fill=True, facecolor='lightblue', edgecolor='blue', linewidth=2, alpha=0.5
)
ax.add_patch(rect)

# Add port marker
ax.plot(-118.25, 33.75, 'r*', markersize=20, label='Port of LA')

ax.set_xlim(-118.5, -118.0)
ax.set_ylim(33.5, 34.0)
ax.set_xlabel('Longitude', fontsize=12)
ax.set_ylabel('Latitude', fontsize=12)
ax.set_title('🗺️ Port of LA - AIS Capture Area', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

plt.show()

---
## 3️⃣ Process AIS Sample Data

In [None]:
# Try to load real AIS data, or create sample
def load_ais_sample():
    """Load AIS data from zip file or create sample."""
    
    # Check for real data
    ais_files = list((PROJECT_ROOT / 'data' / 'raw' / 'ais' / 'noaa_daily').rglob('*.zip'))
    
    if ais_files:
        # Load first available file
        zip_path = ais_files[0]
        print(f"📥 Loading: {zip_path.name}")
        
        with zipfile.ZipFile(zip_path, 'r') as z:
            csv_name = [f for f in z.namelist() if f.endswith('.csv')][0]
            with z.open(csv_name) as f:
                df = pd.read_csv(f, nrows=100000)  # Sample for speed
        
        # Filter to Port of LA area
        df_filtered = df[
            (df['LAT'] >= PORT_LA_BOUNDS['min_lat']) &
            (df['LAT'] <= PORT_LA_BOUNDS['max_lat']) &
            (df['LON'] >= PORT_LA_BOUNDS['min_lon']) &
            (df['LON'] <= PORT_LA_BOUNDS['max_lon'])
        ]
        
        if len(df_filtered) > 0:
            return df_filtered
    
    # Create sample data for demo
    print("📝 Creating sample AIS data for demo...")
    np.random.seed(42)
    n_records = 5000
    
    sample_df = pd.DataFrame({
        'MMSI': np.random.randint(100000000, 999999999, n_records),
        'BaseDateTime': pd.date_range('2024-06-15', periods=n_records, freq='1min'),
        'LAT': np.random.uniform(33.65, 33.85, n_records),
        'LON': np.random.uniform(-118.35, -118.15, n_records),
        'SOG': np.random.uniform(0, 15, n_records),
        'COG': np.random.uniform(0, 360, n_records),
        'VesselType': np.random.choice([70, 71, 72, 80, 81, 60, 30, 31], n_records, 
                                        p=[0.3, 0.15, 0.1, 0.15, 0.1, 0.05, 0.1, 0.05]),
        'VesselName': [f'VESSEL_{i}' for i in range(n_records)]
    })
    
    return sample_df

ais_df = load_ais_sample()
print(f"\n✅ Loaded {len(ais_df):,} AIS records")

In [None]:
# Show sample of AIS data
print("📊 AIS DATA SAMPLE")
print("="*60)
display(ais_df[['MMSI', 'BaseDateTime', 'LAT', 'LON', 'SOG', 'VesselType']].head(10))

---
## 4️⃣ Analyze Ship Traffic

In [None]:
# Classify vessel types
def classify_vessel(code):
    try:
        code = int(code)
        if 70 <= code < 80:
            return 'Cargo'
        elif 80 <= code < 90:
            return 'Tanker'
        elif 60 <= code < 70:
            return 'Passenger'
        elif code == 30:
            return 'Fishing'
        elif code in [31, 32, 52]:
            return 'Tug'
        else:
            return 'Other'
    except:
        return 'Unknown'

ais_df['vessel_category'] = ais_df['VesselType'].apply(classify_vessel)

# Count unique ships by category
ship_counts = ais_df.groupby('vessel_category')['MMSI'].nunique().sort_values(ascending=False)

print("🚢 UNIQUE SHIPS BY CATEGORY")
print("="*50)
for cat, count in ship_counts.items():
    print(f"   {cat}: {count} ships")

print(f"\n   TOTAL: {ais_df['MMSI'].nunique()} unique ships")

In [None]:
# Visualize vessel distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Pie chart of vessel types
ax1 = axes[0]
colors = ['#2ecc71', '#3498db', '#e74c3c', '#f39c12', '#9b59b6', '#95a5a6']
ax1.pie(ship_counts.values, labels=ship_counts.index, autopct='%1.1f%%', colors=colors, startangle=90)
ax1.set_title('🚢 Vessel Type Distribution', fontsize=14, fontweight='bold')

# Bar chart
ax2 = axes[1]
bars = ax2.bar(ship_counts.index, ship_counts.values, color=colors, edgecolor='black')
ax2.set_xlabel('Vessel Type', fontsize=12)
ax2.set_ylabel('Number of Ships', fontsize=12)
ax2.set_title('🚢 Ships by Category', fontsize=14, fontweight='bold')
ax2.tick_params(axis='x', rotation=45)

# Add value labels
for bar, val in zip(bars, ship_counts.values):
    ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, str(val), 
             ha='center', fontweight='bold')

plt.tight_layout()
plt.show()

---
## 5️⃣ Ship Positions Map

In [None]:
# Plot ship positions
fig, ax = plt.subplots(figsize=(12, 10))

# Color by vessel type
color_map = {
    'Cargo': 'green',
    'Tanker': 'blue',
    'Passenger': 'red',
    'Fishing': 'orange',
    'Tug': 'purple',
    'Other': 'gray',
    'Unknown': 'lightgray'
}

for cat, color in color_map.items():
    subset = ais_df[ais_df['vessel_category'] == cat]
    if len(subset) > 0:
        ax.scatter(subset['LON'], subset['LAT'], c=color, label=cat, alpha=0.5, s=10)

# Add port marker
ax.plot(-118.25, 33.75, 'r*', markersize=25, label='Port Center', zorder=10)

ax.set_xlabel('Longitude', fontsize=12)
ax.set_ylabel('Latitude', fontsize=12)
ax.set_title('🗺️ Ship Positions at Port of LA', fontsize=14, fontweight='bold')
ax.legend(loc='upper right')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

---
## 6️⃣ Economic Metrics from AIS

In [None]:
# Calculate economic indicators
print("📊 ECONOMIC INDICATORS FROM AIS")
print("="*60)

total_ships = ais_df['MMSI'].nunique()
cargo_ships = ais_df[ais_df['vessel_category'] == 'Cargo']['MMSI'].nunique()
tanker_ships = ais_df[ais_df['vessel_category'] == 'Tanker']['MMSI'].nunique()

# Economic indicators
cargo_ratio = cargo_ships / total_ships * 100
trade_activity_index = (cargo_ships + tanker_ships) / total_ships * 100

print(f"\n📈 Key Metrics:")
print(f"   • Total unique ships: {total_ships}")
print(f"   • Cargo ships: {cargo_ships} ({cargo_ratio:.1f}%)")
print(f"   • Tanker ships: {tanker_ships}")
print(f"   • Trade Activity Index: {trade_activity_index:.1f}%")

print(f"\n💡 INSIGHT:")
print(f"   High cargo ratio ({cargo_ratio:.0f}%) indicates strong trade activity!")

---
## 📝 Summary

### What We Learned:
1. **AIS** provides real-time ship tracking data
2. **Vessel types** indicate economic activity (cargo = trade)
3. **Ship counts** correlate with port throughput
4. **Daily data** enables high-frequency economic monitoring

### Key Metrics Extracted:
- Total ships in port area
- Cargo vs tanker ratio
- Trade activity index

### Next Step:
→ **Demo 4**: Fuse satellite detections with AIS data

In [None]:
print("="*60)
print("✅ Demo 3 Complete: AIS Data Processing")
print("="*60)
print("\n➡️  Next: Demo_4_Data_Fusion.ipynb")