# Uber Ride Analysis Visualization
This notebook creates interactive maps showing spatial patterns in Uber ride data using Folium. Contains two main visualizations:
- Combined cluster map (all hours)
- Hourly cluster analysis map

Key metrics visualized:
- Trip Earnings per Minute (TED)
- Ride density clusters
- Temporal patterns

In [37]:
# Necessary libraries
import pandas as pd
import numpy as np
import folium
from folium.plugins import HeatMap
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
from h3 import h3
import branca.colormap as cm

In [3]:
# Data loading and preprocessing
df = pd.read_csv('./artifacts/df_rides.csv', index_col=0)
duration = pd.to_timedelta(df['duration_dt'])
df['duration_min_float'] = duration.dt.total_seconds() / 60
df['datetime'] = pd.to_datetime(df['datetime'])
df['hour'] = df['datetime'].dt.hour
df['origin_lat'] = round(df['origin_lat_lng'].str.replace('[', '').\
    str.replace(']', '').str.split(',', expand=True)[0].astype(float),6)
df['origin_lng'] = round(df['origin_lat_lng'].str.replace('[', '').\
    str.replace(']', '').str.split(',', expand=True)[1].astype(float),6)
df['dest_lat'] = round(df['destination_lat_lng'].str.replace('[', '').\
    str.replace(']', '').str.split(',', expand=True)[0].astype(float),6)
df['dest_lng'] = round(df['destination_lat_lng'].str.replace('[', '').\
    str.replace(']', '').str.split(',', expand=True)[1].astype(float),6)
df['h3_origin'] = df.apply(lambda row: h3.geo_to_h3(row['origin_lat'], 
                                                    row['origin_lng'], 
                                                    8), axis=1)
df['h3_dest'] = df.apply(lambda row: h3.geo_to_h3(row['dest_lat'], 
                                                  row['dest_lng'], 
                                                  8), axis=1)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

## Interactive Map Generation
### Layer Structure
- **Base Layers**  
  CartoDB Positron (default)  
  OpenStreetMap (toggleable)

- **Overlay Layers**  
  Hourly clusters (0-23h)  
    ├─ Origins (green clusters)  
    └─ Destinations (red clusters)  
  Combined clusters  
    ├─ Origin points  
    └─ Destination points

In [8]:
origin_h3_data = df.groupby('h3_origin').agg({
    'ted': 'mean',
    'type': 'count',
}).reset_index()
origin_h3_data.rename(columns={'type': 'origin_ride_count'}, inplace=True)
origin_h3_data['center_lat'] = origin_h3_data['h3_origin'].\
    apply(lambda x: h3.h3_to_geo(x)[0])
origin_h3_data['center_lng'] = origin_h3_data['h3_origin'].\
    apply(lambda x: h3.h3_to_geo(x)[1])

In [17]:
def h3_to_geojson(h3_index):
    """Converts an H3 index to a GeoJSON polygon."""
    try:
        polygon = h3.h3_to_geo_boundary(h3_index, geo_json=True)
        return {
            "type": "Feature",
            "geometry": {
                "type": "Polygon",
                "coordinates": [polygon]
            },
            "properties": {}  # You'll add properties (e.g., ride count) later
        }
    except ValueError:
        return None  # Handle invalid H3 indexes

# Create a dictionary to store GeoJSON features
origin_geojson_features = []
for index, row in origin_h3_data.iterrows():
    h3_index = row['h3_origin']
    feature = h3_to_geojson(h3_index)
    if feature:
        feature["properties"] = {
            "ride_count": row['origin_ride_count'],
            "avg_ted": row['ted']
        }
        origin_geojson_features.append(feature)

# Create a GeoJSON FeatureCollection
origin_geojson = {
    "type": "FeatureCollection",
    "features": origin_geojson_features
}

In [None]:
import folium

h3_map = folium.Map(location=[origin_h3_data['center_lat'].mean(), 
                           origin_h3_data['center_lng'].mean()],
                 tiles='Cartodb Positron', 
                 zoom_start=12)

# Choose a color scale (e.g., 'YlGnBu' for yellow-green-blue)
color_scale = cm.linear.viridis.scale(
    origin_h3_data['origin_ride_count'].min(), 
    origin_h3_data['origin_ride_count'].max()
)

folium.GeoJson(
    origin_geojson,
    style_function=lambda feature: {
        'fillColor': color_scale(feature['properties']['ride_count']) 
        if feature['properties'].get('ride_count') is not None else 'gray',
        'color': 'black',
        'weight': 0,
        'fillOpacity': 0.3
    },
    tooltip=folium.GeoJsonTooltip(fields=['ride_count', 'avg_ted'], 
                                  aliases=['Ride Count', 'Average TED'])
).add_to(h3_map)

# Add the color scale to the h3_map
color_scale.add_to(h3_map)

# Combined map save and display
h3_map.save("./artifacts/h3_heatmap.html") # Save the map
print("H3 Heatmap saved to h3_heatmap.html")
h3_map  # Display the map

H3 Heatmap saved to h3_heatmap.html


## Temporal Analysis

In [38]:
# Hourly H3 Maps with Layers

# Map creation
h3_hourly_map = folium.Map(location=[origin_h3_data['center_lat'].mean(), 
                           origin_h3_data['center_lng'].mean()],
                 tiles='Cartodb Positron', 
                 zoom_start=12)

# Create a layer control upfront
layer_control = folium.LayerControl(position='topright', collapsed=False)

# Create hourly clusters and layers
for hour in range(24):
    hourly_df = df[df['hour'] == hour].copy()

    if hourly_df.empty: # Skip hours with no data
        print(f"No data for hour {hour}, skipping.")
        continue
    
    hourly_origin_h3_data = hourly_df.groupby('h3_origin').agg({
        'ted': 'mean',
        'type': 'count',
    }).reset_index()
    hourly_origin_h3_data.rename(columns={'type': 'origin_ride_count'}, inplace=True)
    
    hourly_origin_h3_data['center_lat'] = hourly_origin_h3_data['h3_origin'].\
        apply(lambda x: h3.h3_to_geo(x)[0])
    hourly_origin_h3_data['center_lng'] = hourly_origin_h3_data['h3_origin'].\
        apply(lambda x: h3.h3_to_geo(x)[1])

    # Create a dictionary to store GeoJSON features
    origin_geojson_features = []
    for index, row in hourly_origin_h3_data.iterrows():
        h3_index = row['h3_origin']
        feature = h3_to_geojson(h3_index)
        if feature:
            feature["properties"] = {
                "ride_count": row['origin_ride_count'],
                "avg_ted": row['ted']
            }
            origin_geojson_features.append(feature)

    # Create a GeoJSON FeatureCollection
    origin_geojson = {
        "type": "FeatureCollection",
        "features": origin_geojson_features
    }
    
    # Create separate feature groups for each hour
    hour_group = folium.FeatureGroup(name=f'Hour {hour}', show=False)
    
    # Color scale for hourly origin clusters
    color_scale = cm.linear.viridis.scale(
        hourly_origin_h3_data['origin_ride_count'].min(), 
        hourly_origin_h3_data['origin_ride_count'].max()
    )
    
    # Add the GeoJSON layer    
    folium.GeoJson(
        origin_geojson,
        style_function=lambda feature: {
            'fillColor': color_scale(feature['properties']['ride_count']) 
            if feature['properties'].get('ride_count') is not None else 'gray',
            'color': 'black',
            'weight': 0,
            'fillOpacity': 0.3
        },
        tooltip=folium.GeoJsonTooltip(fields=['ride_count', 'avg_ted'], 
                                    aliases=['Ride Count', 'Average TED'])
    ).add_to(hour_group)
    
    # Add the hour group to the map
    hour_group.add_to(h3_hourly_map)
    
# Layer control added to hourly map
layer_control.add_to(h3_hourly_map)

# Add the color scale to the h3_map
color_scale.add_to(h3_map)

# Hourly map save and display
h3_hourly_map.save("./artifacts/h3_hourly_cluster_map.html")
print("Hourly Cluster Map saved to hourly_cluster_map.html")
h3_hourly_map

Hourly Cluster Map saved to hourly_cluster_map.html


## Heatmap analysis

- **Overlay Layers**  
  Hourly clusters (0-23h)  
    ├─ Origins (green clusters)  
    └─ Destinations (red clusters)  
  Combined clusters  
    ├─ Origin points  
    └─ Destination points

In [8]:
# Normalize ted data
scaler = MinMaxScaler()
ted_scaled = clean_df['ted'].values.reshape(-1, 1)
scaler.fit(ted_scaled)
ted_scaled = scaler.transform(ted_scaled)
clean_df['ted_scaled'] = ted_scaled

In [9]:
# Combined Origin/Destination Heatmap

# Heatmap params
radius = 15

# Create heat_map
heat_location_lat = clean_df['origin_lat'].mean()
heat_location_lng = clean_df['origin_lng'].mean()

heat_map = folium.Map(location=[heat_location_lat,
                                heat_location_lng],
                      zoom_start=12,
                      tiles='Cartodb Positron')

# Create a layer control upfront
layer_control = folium.LayerControl(position='topright', collapsed=False)

# Create Feature Groups for Layers
origin_heat = folium.FeatureGroup(name='Origin Heatmap', show=True)
dest_heat = folium.FeatureGroup(name='Destination Heatmap', show=False)

# Visualize Heatmap for Origins
origin_data = [[row['origin_lat'], row['origin_lng'], row['ted_scaled']] 
               for index, row in clean_df.iterrows()]
hm_origin = folium.plugins.HeatMap(origin_data,
                                   radius=radius,
                                   name='Origin Heatmap')
hm_origin.add_to(origin_heat)

# Visualize Heatmap for Destinations
dest_data = [[row['dest_lat'], row['dest_lng'], row['ted_scaled']] 
               for index, row in clean_df.iterrows()]
hm_dest = folium.plugins.HeatMap(dest_data,
                                 radius=radius,
                                 name='Destination Heatmap')
hm_dest.add_to(dest_heat)

# Add the heat layers to the map
origin_heat.add_to(heat_map)
dest_heat.add_to(heat_map)

# Layer control added to heat map
layer_control.add_to(heat_map)

# Heat map save and display
heat_map.save("./artifacts/heat_map.html")
print("Heat Map saved to heat_map.html")
heat_map

Heat Map saved to heat_map.html


In [10]:
# Origin Hourly Heatmap
# Hourly data for origins
hourly_origin_data = [[row['origin_lat'], row['origin_lng'], row['ted_scaled']] 
                      for index, row in clean_df.iterrows()]

# Create heat_map for origins
hourly_heat_map = folium.Map(location=[combined_map_center_lat,
                                        combined_map_center_lng],
                              zoom_start=12,
                              tiles='Cartodb Positron')

# Define radius for the heatmap
radius = 15  # You can adjust this value as needed

# Group origin data by hour
unique_hours = sorted(clean_df['hour'].unique())
origin_data_by_hour = [
    [[row['origin_lat'], row['origin_lng'], row['ted_scaled']] 
     for _, row in clean_df[clean_df['hour'] == hour].iterrows()]
    for hour in unique_hours
]

# Visualize Heatmap for Origins
hm_h_origin = folium.plugins.HeatMapWithTime(
    data=origin_data_by_hour,
    radius=radius,
    index=unique_hours,  # List of hours as the index
    name='Origin Heatmap'
)
hm_h_origin.add_to(hourly_heat_map)

# Destination Hourly Heatmap
# Hourly data for destinations
hourly_dest_data = [[row['dest_lat'], row['dest_lng'], row['ted_scaled']] 
                    for index, row in clean_df.iterrows()]

# Group destination data by hour
dest_data_by_hour = [
    [[row['dest_lat'], row['dest_lng'], row['ted_scaled']] 
     for _, row in clean_df[clean_df['hour'] == hour].iterrows()]
    for hour in unique_hours
]

# Visualize Heatmap for Destinations
hm_h_dest = folium.plugins.HeatMapWithTime(
    data=dest_data_by_hour,
    radius=radius,
    index=unique_hours,  # List of hours as the index
    name='Destination Heatmap'
)
hm_h_dest.add_to(hourly_heat_map)

# Add Layer Control
folium.LayerControl(position='topright', collapsed=False).add_to(hourly_heat_map)

# Save and display the map
hourly_heat_map.save("./artifacts/hourly_heat_map.html")
print("Heat Map saved to hourly_heat_map.html")
hourly_heat_map

Heat Map saved to hourly_heat_map.html


## Output Files
- `combined_cluster_map.html`: Persistent demand patterns  
- `hourly_cluster_map.html`: Time-based fluctuations 
- `heat_map.html`: Origin and Destination Heatmap
- `hourly_heat_map.html`: Origin and Destination Heatmap
- `clusters.csv`: Exported cluster metrics