# NYC Manhattan Demand Patterns Visualization (7pm-8pm)

This notebook visualizes pickup and dropoff patterns from the scenario_nyc_manhattan.json dataset.
We'll create interactive maps showing:
1. **Pickup hotspots** - Where trips originate
2. **Dropoff hotspots** - Where trips end
3. **Origin-Destination flows** - Trip corridors as arcs
4. **Temporal patterns** - How demand changes within the hour

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json
from keplergl import KeplerGl
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import LineString, Point
from collections import defaultdict

## 1. Load Geographic Data

**Note:** We filter for Manhattan zones only (districtcode starting with '1').

In [None]:
# Load NYC zone geometries
zones_gdf = gpd.read_file('data/nyc_zones.geojson')
print(f"Loaded {len(zones_gdf)} total zones")

# Filter for Manhattan zones only (districtcode starting with '1')
zones_gdf = zones_gdf[zones_gdf['districtcode'].astype(str).str.startswith('1')].copy()
zones_gdf = zones_gdf.reset_index(drop=True)

print(f"Filtered to {len(zones_gdf)} Manhattan zones")
print(f"Columns: {zones_gdf.columns.tolist()}")

# Calculate centroids for each zone (for O-D flow lines)
zones_gdf['centroid'] = zones_gdf.geometry.centroid
zones_gdf['centroid_lon'] = zones_gdf.centroid.x
zones_gdf['centroid_lat'] = zones_gdf.centroid.y

zones_gdf.head()

## 2. Load and Parse Scenario Data

In [None]:
# Load the scenario JSON file
with open('data/scenario_nyc_manhattan.json', 'r') as f:
    scenario_data = json.load(f)

print(f"Number of regions: {scenario_data['nlat']} x {scenario_data['nlon']} = {scenario_data['nlat'] * scenario_data['nlon']}")
print(f"Total demand records: {len(scenario_data['demand'])}")

# Convert to DataFrame for easier manipulation
demand_df = pd.DataFrame(scenario_data['demand'])
print(f"\nDemand DataFrame shape: {demand_df.shape}")
print(f"Columns: {demand_df.columns.tolist()}")
print(f"\nTime stamp range: {demand_df['time_stamp'].min()} to {demand_df['time_stamp'].max()}")
demand_df.head(10)

## 3. Understanding the Time Stamps

The scenario uses time stamps in minutes from midnight. For 7pm-8pm:
- 7:00 PM = 19 * 60 = 1140 minutes
- 8:00 PM = 20 * 60 = 1200 minutes

In [None]:
# Filter data for 7pm-8pm (1140-1200 minutes from midnight)
start_time = 19 * 60  # 7pm in minutes
end_time = 20 * 60    # 8pm in minutes

demand_7pm_8pm = demand_df[(demand_df['time_stamp'] >= start_time) & 
                           (demand_df['time_stamp'] < end_time)].copy()

print(f"Filtered demand records (7pm-8pm): {len(demand_7pm_8pm)}")
print(f"Total trips in this hour: {demand_7pm_8pm['demand'].sum()}")
print(f"\nUnique origins: {demand_7pm_8pm['origin'].nunique()}")
print(f"Unique destinations: {demand_7pm_8pm['destination'].nunique()}")
print(f"\nTime stamps present: {sorted(demand_7pm_8pm['time_stamp'].unique())}")

# Add human-readable time
demand_7pm_8pm['hour'] = demand_7pm_8pm['time_stamp'] // 60
demand_7pm_8pm['minute'] = demand_7pm_8pm['time_stamp'] % 60
demand_7pm_8pm['time_str'] = demand_7pm_8pm['hour'].astype(str) + ':' + demand_7pm_8pm['minute'].astype(str).str.zfill(2)

demand_7pm_8pm.head()

## 4. Create Pickup Hotspot Data

In [None]:
# Aggregate pickups by origin zone
pickup_stats = demand_7pm_8pm.groupby('origin').agg({
    'demand': 'sum',
    'price': 'mean',
    'travel_time': 'mean'
}).reset_index()

pickup_stats.columns = ['region_id', 'total_pickups', 'avg_price', 'avg_travel_time']

# Merge with geographic data
pickup_gdf = zones_gdf.merge(pickup_stats, left_index=True, right_on='region_id', how='left')
pickup_gdf['total_pickups'] = pickup_gdf['total_pickups'].fillna(0)
pickup_gdf['avg_price'] = pickup_gdf['avg_price'].fillna(0)
pickup_gdf['avg_travel_time'] = pickup_gdf['avg_travel_time'].fillna(0)

print(f"Pickup statistics:")
print(f"  Total pickups across all zones: {pickup_gdf['total_pickups'].sum():.0f}")
print(f"  Zones with pickups: {(pickup_gdf['total_pickups'] > 0).sum()}")
print(f"  Max pickups in single zone: {pickup_gdf['total_pickups'].max():.0f}")
print(f"  Avg pickups per active zone: {pickup_gdf[pickup_gdf['total_pickups'] > 0]['total_pickups'].mean():.1f}")

pickup_gdf.head()

## 5. Create Dropoff Hotspot Data

In [None]:
# Aggregate dropoffs by destination zone
dropoff_stats = demand_7pm_8pm.groupby('destination').agg({
    'demand': 'sum',
    'price': 'mean',
    'travel_time': 'mean'
}).reset_index()

dropoff_stats.columns = ['region_id', 'total_dropoffs', 'avg_price', 'avg_travel_time']

# Merge with geographic data
dropoff_gdf = zones_gdf.merge(dropoff_stats, left_index=True, right_on='region_id', how='left')
dropoff_gdf['total_dropoffs'] = dropoff_gdf['total_dropoffs'].fillna(0)
dropoff_gdf['avg_price'] = dropoff_gdf['avg_price'].fillna(0)
dropoff_gdf['avg_travel_time'] = dropoff_gdf['avg_travel_time'].fillna(0)

print(f"Dropoff statistics:")
print(f"  Total dropoffs across all zones: {dropoff_gdf['total_dropoffs'].sum():.0f}")
print(f"  Zones with dropoffs: {(dropoff_gdf['total_dropoffs'] > 0).sum()}")
print(f"  Max dropoffs in single zone: {dropoff_gdf['total_dropoffs'].max():.0f}")
print(f"  Avg dropoffs per active zone: {dropoff_gdf[dropoff_gdf['total_dropoffs'] > 0]['total_dropoffs'].mean():.1f}")

dropoff_gdf.head()

## 6. Create Net Flow (Pickups - Dropoffs)

In [None]:
# Calculate net flow: positive = more pickups (source), negative = more dropoffs (sink)
net_flow_gdf = zones_gdf.copy()
net_flow_gdf = net_flow_gdf.merge(
    pickup_stats[['region_id', 'total_pickups']], 
    left_index=True, 
    right_on='region_id', 
    how='left'
)
net_flow_gdf = net_flow_gdf.merge(
    dropoff_stats[['region_id', 'total_dropoffs']], 
    left_index=True, 
    right_on='region_id', 
    how='left',
    suffixes=('', '_drop')
)

net_flow_gdf['total_pickups'] = net_flow_gdf['total_pickups'].fillna(0)
net_flow_gdf['total_dropoffs'] = net_flow_gdf['total_dropoffs'].fillna(0)
net_flow_gdf['net_flow'] = net_flow_gdf['total_pickups'] - net_flow_gdf['total_dropoffs']
net_flow_gdf['flow_type'] = net_flow_gdf['net_flow'].apply(
    lambda x: 'Source (More Pickups)' if x > 0 else 'Sink (More Dropoffs)' if x < 0 else 'Balanced'
)

print(f"Net flow statistics:")
print(f"  Source zones (more pickups): {(net_flow_gdf['net_flow'] > 0).sum()}")
print(f"  Sink zones (more dropoffs): {(net_flow_gdf['net_flow'] < 0).sum()}")
print(f"  Balanced zones: {(net_flow_gdf['net_flow'] == 0).sum()}")
print(f"  Largest source: {net_flow_gdf['net_flow'].max():.0f} trips")
print(f"  Largest sink: {net_flow_gdf['net_flow'].min():.0f} trips")

net_flow_gdf.head()

## 7. Create Origin-Destination Flow Lines

In [None]:
# Create O-D flow data with minimum threshold to avoid clutter
min_trips = 5  # Only show O-D pairs with at least 5 trips

od_flows = demand_7pm_8pm.groupby(['origin', 'destination']).agg({
    'demand': 'sum',
    'price': 'mean',
    'travel_time': 'mean'
}).reset_index()

od_flows.columns = ['origin', 'destination', 'total_trips', 'avg_price', 'avg_travel_time']
od_flows = od_flows[od_flows['total_trips'] >= min_trips]

print(f"O-D pairs with >= {min_trips} trips: {len(od_flows)}")
print(f"Total trips represented: {od_flows['total_trips'].sum():.0f}")

# Create flow lines
flow_lines = []
for _, row in od_flows.iterrows():
    origin_id = int(row['origin'])
    dest_id = int(row['destination'])
    
    if origin_id >= len(zones_gdf) or dest_id >= len(zones_gdf):
        continue
    
    origin_centroid = zones_gdf.iloc[origin_id].centroid
    dest_centroid = zones_gdf.iloc[dest_id].centroid
    
    line = LineString([origin_centroid, dest_centroid])
    
    flow_lines.append({
        'origin_id': origin_id,
        'destination_id': dest_id,
        'total_trips': row['total_trips'],
        'avg_price': row['avg_price'],
        'avg_travel_time': row['avg_travel_time'],
        'origin_lon': origin_centroid.x,
        'origin_lat': origin_centroid.y,
        'dest_lon': dest_centroid.x,
        'dest_lat': dest_centroid.y,
        'geometry': line
    })

od_flow_gdf = gpd.GeoDataFrame(flow_lines, crs=zones_gdf.crs)
print(f"Created {len(od_flow_gdf)} O-D flow lines")
od_flow_gdf.head()

## 8. Visualization 1: Pickup Hotspots

In [None]:
# Create pickup heatmap
map_pickups = KeplerGl(height=600)
map_pickups.add_data(data=pickup_gdf, name='Pickup Hotspots')

pickup_config = {
    'version': 'v1',
    'config': {
        'mapState': {
            'latitude': 40.75,
            'longitude': -73.95,
            'zoom': 11
        },
        'visState': {
            'layers': [
                {
                    'type': 'geojson',
                    'config': {
                        'dataId': 'Pickup Hotspots',
                        'label': 'Pickup Demand (7pm-8pm)',
                        'color': [18, 147, 154],
                        'columns': {'geojson': 'geometry'},
                        'isVisible': True,
                        'visConfig': {
                            'opacity': 0.8,
                            'strokeOpacity': 0.8,
                            'thickness': 0.5,
                            'strokeColor': [255, 255, 255],
                            'colorRange': {
                                'name': 'Global Warming',
                                'type': 'sequential',
                                'category': 'Uber',
                                'colors': ['#5A1846', '#900C3F', '#C70039', '#E3611C', '#F1920E', '#FFC300']
                            },
                            'filled': True,
                            'stroked': True,
                            'enable3d': False,
                            'wireframe': False
                        },
                        'colorField': {'name': 'total_pickups', 'type': 'real'},
                        'colorScale': 'quantile'
                    }
                }
            ]
        }
    }
}

map_pickups.config = pickup_config
map_pickups

## 9. Visualization 2: Dropoff Hotspots

In [None]:
# Create dropoff heatmap
map_dropoffs = KeplerGl(height=600)
map_dropoffs.add_data(data=dropoff_gdf, name='Dropoff Hotspots')

dropoff_config = {
    'version': 'v1',
    'config': {
        'mapState': {
            'latitude': 40.75,
            'longitude': -73.95,
            'zoom': 11
        },
        'visState': {
            'layers': [
                {
                    'type': 'geojson',
                    'config': {
                        'dataId': 'Dropoff Hotspots',
                        'label': 'Dropoff Demand (7pm-8pm)',
                        'color': [18, 92, 154],
                        'columns': {'geojson': 'geometry'},
                        'isVisible': True,
                        'visConfig': {
                            'opacity': 0.8,
                            'strokeOpacity': 0.8,
                            'thickness': 0.5,
                            'strokeColor': [255, 255, 255],
                            'colorRange': {
                                'name': 'Ice And Fire',
                                'type': 'sequential',
                                'category': 'Uber',
                                'colors': ['#0198BD', '#49E3CE', '#E8FA5B', '#FED02C', '#FE9900', '#FF5050']
                            },
                            'filled': True,
                            'stroked': True,
                            'enable3d': False,
                            'wireframe': False
                        },
                        'colorField': {'name': 'total_dropoffs', 'type': 'real'},
                        'colorScale': 'quantile'
                    }
                }
            ]
        }
    }
}

map_dropoffs.config = dropoff_config
map_dropoffs

## 10. Visualization 3: Net Flow (Source/Sink Analysis)

In [None]:
# Create net flow visualization
map_net_flow = KeplerGl(height=600)
map_net_flow.add_data(data=net_flow_gdf, name='Net Flow')

net_flow_config = {
    'version': 'v1',
    'config': {
        'mapState': {
            'latitude': 40.75,
            'longitude': -73.95,
            'zoom': 11
        },
        'visState': {
            'layers': [
                {
                    'type': 'geojson',
                    'config': {
                        'dataId': 'Net Flow',
                        'label': 'Net Flow (Pickups - Dropoffs)',
                        'color': [18, 147, 154],
                        'columns': {'geojson': 'geometry'},
                        'isVisible': True,
                        'visConfig': {
                            'opacity': 0.8,
                            'strokeOpacity': 0.8,
                            'thickness': 0.5,
                            'strokeColor': [255, 255, 255],
                            'colorRange': {
                                'name': 'Diverging RdBu',
                                'type': 'diverging',
                                'category': 'ColorBrewer',
                                'colors': ['#2166AC', '#4393C3', '#92C5DE', '#D1E5F0', '#F7F7F7', '#FDDBC7', '#F4A582', '#D6604D', '#B2182B']
                            },
                            'filled': True,
                            'stroked': True,
                            'wireframe': False
                        },
                        'colorField': {'name': 'net_flow', 'type': 'real'},
                        'colorScale': 'quantize'
                    }
                }
            ]
        }
    }
}

map_net_flow.config = net_flow_config
map_net_flow

## 11. Visualization 4: Origin-Destination Flow Lines

In [None]:
# Create O-D flow visualization
map_od_flows = KeplerGl(height=600)
map_od_flows.add_data(data=od_flow_gdf, name='O-D Flows')

od_flow_config = {
    'version': 'v1',
    'config': {
        'mapState': {
            'latitude': 40.75,
            'longitude': -73.95,
            'zoom': 11
        },
        'visState': {
            'layers': [
                {
                    'type': 'arc',
                    'config': {
                        'dataId': 'O-D Flows',
                        'label': 'Trip Flows (7pm-8pm)',
                        'color': [130, 154, 227],
                        'columns': {
                            'lat0': 'origin_lat',
                            'lng0': 'origin_lon',
                            'lat1': 'dest_lat',
                            'lng1': 'dest_lon'
                        },
                        'isVisible': True,
                        'visConfig': {
                            'opacity': 0.6,
                            'thickness': 2,
                            'colorRange': {
                                'name': 'Global Warming',
                                'type': 'sequential',
                                'category': 'Uber',
                                'colors': ['#5A1846', '#900C3F', '#C70039', '#E3611C', '#F1920E', '#FFC300']
                            },
                            'sizeRange': [0, 10],
                            'targetColor': None
                        },
                        'colorField': {'name': 'total_trips', 'type': 'integer'},
                        'colorScale': 'quantile',
                        'sizeField': {'name': 'total_trips', 'type': 'integer'},
                        'sizeScale': 'sqrt'
                    }
                }
            ]
        }
    }
}

map_od_flows.config = od_flow_config
map_od_flows

## 12. Visualization 5: Combined View with 3D Elevation

In [None]:
# Create comprehensive combined view
map_combined = KeplerGl(height=700)
map_combined.add_data(data=pickup_gdf, name='Pickups')
map_combined.add_data(data=dropoff_gdf, name='Dropoffs')
map_combined.add_data(data=od_flow_gdf, name='Flows')
map_combined.add_data(data=net_flow_gdf, name='Net Flow')

# Configure with 3D pickup visualization
combined_config = {
    'version': 'v1',
    'config': {
        'mapState': {
            'latitude': 40.75,
            'longitude': -73.95,
            'zoom': 11,
            'pitch': 45,
            'bearing': 0
        },
        'visState': {
            'layers': [
                {
                    'type': 'geojson',
                    'config': {
                        'dataId': 'Pickups',
                        'label': '3D Pickup Demand',
                        'color': [255, 203, 153],
                        'columns': {'geojson': 'geometry'},
                        'isVisible': True,
                        'visConfig': {
                            'opacity': 0.8,
                            'strokeOpacity': 0.3,
                            'thickness': 0.5,
                            'strokeColor': [255, 255, 255],
                            'colorRange': {
                                'name': 'Global Warming',
                                'type': 'sequential',
                                'category': 'Uber',
                                'colors': ['#5A1846', '#900C3F', '#C70039', '#E3611C', '#F1920E', '#FFC300']
                            },
                            'filled': True,
                            'stroked': True,
                            'enable3d': True,
                            'elevationScale': 15,
                            'wireframe': False
                        },
                        'colorField': {'name': 'total_pickups', 'type': 'real'},
                        'colorScale': 'quantile',
                        'heightField': {'name': 'total_pickups', 'type': 'real'},
                        'heightScale': 'linear'
                    }
                },
                {
                    'type': 'arc',
                    'config': {
                        'dataId': 'Flows',
                        'label': 'Trip Flows',
                        'color': [130, 154, 227],
                        'columns': {
                            'lat0': 'origin_lat',
                            'lng0': 'origin_lon',
                            'lat1': 'dest_lat',
                            'lng1': 'dest_lon'
                        },
                        'isVisible': False,  # Start hidden, user can toggle
                        'visConfig': {
                            'opacity': 0.4,
                            'thickness': 2,
                            'colorRange': {
                                'name': 'Ice And Fire',
                                'type': 'sequential',
                                'category': 'Uber',
                                'colors': ['#0198BD', '#49E3CE', '#E8FA5B', '#FED02C', '#FE9900', '#FF5050']
                            },
                            'sizeRange': [0, 8],
                            'targetColor': None
                        },
                        'colorField': {'name': 'total_trips', 'type': 'integer'},
                        'colorScale': 'quantile',
                        'sizeField': {'name': 'total_trips', 'type': 'integer'},
                        'sizeScale': 'sqrt'
                    }
                }
            ]
        }
    }
}

map_combined.config = combined_config
map_combined

## 13. Statistical Analysis

In [None]:
# Top pickup locations
print("\n" + "="*70)
print("TOP 10 PICKUP LOCATIONS (7pm-8pm)")
print("="*70)
top_pickups = pickup_gdf.nlargest(10, 'total_pickups')[['region_id', 'total_pickups', 'avg_price', 'avg_travel_time']]
print(top_pickups.to_string(index=False))

# Top dropoff locations
print("\n" + "="*70)
print("TOP 10 DROPOFF LOCATIONS (7pm-8pm)")
print("="*70)
top_dropoffs = dropoff_gdf.nlargest(10, 'total_dropoffs')[['region_id', 'total_dropoffs', 'avg_price', 'avg_travel_time']]
print(top_dropoffs.to_string(index=False))

# Top O-D pairs
print("\n" + "="*70)
print("TOP 10 ORIGIN-DESTINATION PAIRS (7pm-8pm)")
print("="*70)
top_od = od_flows.nlargest(10, 'total_trips')[['origin', 'destination', 'total_trips', 'avg_price', 'avg_travel_time']]
print(top_od.to_string(index=False))

# Biggest sources and sinks
print("\n" + "="*70)
print("TOP 5 SOURCE ZONES (More Pickups than Dropoffs)")
print("="*70)
top_sources = net_flow_gdf.nlargest(5, 'net_flow')[['region_id', 'total_pickups', 'total_dropoffs', 'net_flow']]
print(top_sources.to_string(index=False))

print("\n" + "="*70)
print("TOP 5 SINK ZONES (More Dropoffs than Pickups)")
print("="*70)
top_sinks = net_flow_gdf.nsmallest(5, 'net_flow')[['region_id', 'total_pickups', 'total_dropoffs', 'net_flow']]
print(top_sinks.to_string(index=False))

## 14. Time-Series Analysis (Within the Hour)

In [None]:
# Demand evolution within 7pm-8pm hour
temporal_demand = demand_7pm_8pm.groupby('time_stamp').agg({
    'demand': 'sum',
    'price': 'mean'
}).reset_index()

temporal_demand['time_str'] = (temporal_demand['time_stamp'] // 60).astype(str) + ':' + \
                               (temporal_demand['time_stamp'] % 60).astype(str).str.zfill(2)

# Plot demand over time
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

ax1.bar(temporal_demand['time_str'], temporal_demand['demand'], color='steelblue', alpha=0.7)
ax1.set_xlabel('Time', fontsize=12)
ax1.set_ylabel('Total Trips', fontsize=12)
ax1.set_title('Demand Evolution (7pm-8pm)', fontsize=14, fontweight='bold')
ax1.tick_params(axis='x', rotation=45)
ax1.grid(axis='y', alpha=0.3)

ax2.plot(temporal_demand['time_str'], temporal_demand['price'], marker='o', color='coral', linewidth=2)
ax2.set_xlabel('Time', fontsize=12)
ax2.set_ylabel('Average Price ($)', fontsize=12)
ax2.set_title('Average Price Over Time', fontsize=14, fontweight='bold')
ax2.tick_params(axis='x', rotation=45)
ax2.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

print("\nTemporal Statistics:")
print(temporal_demand)

## 15. Export Data and Maps

In [None]:
# Save visualizations to HTML files
# Uncomment to save:

# map_pickups.save_to_html(file_name='visualizations/nyc_pickups_7pm_8pm.html')
# map_dropoffs.save_to_html(file_name='visualizations/nyc_dropoffs_7pm_8pm.html')
# map_od_flows.save_to_html(file_name='visualizations/nyc_od_flows_7pm_8pm.html')
# map_combined.save_to_html(file_name='visualizations/nyc_combined_7pm_8pm.html')

# Save processed data
# pickup_gdf.to_file('processed_data/pickups_7pm_8pm.geojson', driver='GeoJSON')
# dropoff_gdf.to_file('processed_data/dropoffs_7pm_8pm.geojson', driver='GeoJSON')
# od_flow_gdf.to_file('processed_data/od_flows_7pm_8pm.geojson', driver='GeoJSON')
# net_flow_gdf.to_file('processed_data/net_flow_7pm_8pm.geojson', driver='GeoJSON')

print("To save visualizations and data, uncomment the save commands above")

## Summary

This notebook provides comprehensive visualizations of NYC Manhattan demand patterns for the 7pm-8pm hour:

### **Visualizations Created:**
1. **Pickup Hotspots** - Heatmap showing where trips originate
2. **Dropoff Hotspots** - Heatmap showing where trips end
3. **Net Flow Analysis** - Shows source zones (more pickups) vs sink zones (more dropoffs)
4. **O-D Flow Lines** - Arc diagram showing trip corridors with volume-based thickness
5. **3D Combined View** - Multi-layer visualization with elevation representing demand
6. **Temporal Analysis** - Charts showing demand evolution within the hour

### **Key Insights:**
- Identifies busy pickup and dropoff zones
- Reveals imbalances (sources vs sinks) that require rebalancing
- Shows major travel corridors
- Demonstrates temporal patterns within the hour

### **Interactive Features:**
- Hover tooltips with detailed information
- Layer toggles to show/hide different datasets
- 3D rotation and zoom
- Color-coded by demand intensity
- Arc thickness represents trip volume

These visualizations can help understand baseline demand patterns and identify where rebalancing and dynamic pricing strategies are most needed!