In [None]:
import geopandas as gpd
import pandas as pd
import os

# Load taxi trip data
df = pd.read_csv("testing_no_null.csv", low_memory=False)

# Convert float census tract codes to 11-digit zero-padded strings
df['Pickup Census Tract'] = df['Pickup Census Tract'].astype(str).str.zfill(11)
df['Dropoff Census Tract'] = df['Dropoff Census Tract'].astype(str).str.zfill(11)

# Create set of all used tract codes
used_tracts = set(df['Pickup Census Tract']).union(set(df['Dropoff Census Tract']))
print(used_tracts)

# Path to shapefile directory
shapefile_dir = "tl_2024_17_tract"

# Find the .shp file
shapefile_path = next((os.path.join(shapefile_dir, f) for f in os.listdir(shapefile_dir) if f.endswith(".shp")), None)

if shapefile_path:
    # Load shapefile
    gdf = gpd.read_file(shapefile_path)

    # Ensure GEOID is string
    gdf['GEOID'] = gdf['GEOID'].astype(str)

    # Filter to only tracts used in the dataset
    gdf_filtered = gdf[gdf['GEOID'].isin(used_tracts)]

    # Save filtered GeoJSON
    geojson_path = "tl_2024_17_tract.geojson"
    gdf_filtered.to_file(geojson_path, driver='GeoJSON')

    print(f"Filtered GeoJSON saved with {len(gdf_filtered)} tracts to {geojson_path}")
else:
    print("No .shp file found in the tl_2024_17_tract directory.")



In [None]:
# --- 1. Import Required Libraries ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from sklearn.mixture import GaussianMixture
import h3
import folium
from folium.plugins import HeatMap
import warnings
warnings.filterwarnings("ignore")


df = pd.read_csv("testing_no_null.csv", low_memory=False)

df['Trip Start Timestamp'] = pd.to_datetime(df['Trip Start Timestamp'])
df['Trip End Timestamp'] = pd.to_datetime(df['Trip End Timestamp'])


def lat_lng_to_h3(row, res=8):
    return h3.latlng_to_cell(row['Pickup Centroid Latitude'], row['Pickup Centroid Longitude'], res)

if 'Pickup Centroid Latitude' in df.columns and 'Pickup Centroid Longitude' in df.columns:
    df['H3 Index'] = df.apply(lat_lng_to_h3, axis=1)
else:
    print("Pickup Centroid Latitude or Longitude not found — skipping H3 indexing.")

df['Hour'] = df['Trip Start Timestamp'].dt.hour
df['Day'] = df['Trip Start Timestamp'].dt.date

# Group by spatial + temporal units
agg = df.groupby(['H3 Index', 'Hour']).size().reset_index(name='Trip Count')

# --- 6. Gaussian Mixture Model Clustering ---
coords = df[['Pickup Centroid Latitude', 'Pickup Centroid Longitude']].dropna()

# Fit GMM
gmm = GaussianMixture(n_components=5, covariance_type='full').fit(coords)
coords['Cluster'] = gmm.predict(coords)

# Plot clusters
plt.figure(figsize=(10, 6))
sns.scatterplot(
    x='Pickup Centroid Longitude',
    y='Pickup Centroid Latitude',
    hue='Cluster',
    data=coords,
    palette='viridis',
    alpha=0.6
)
plt.title("GMM Clusters of Taxi Pickup Locations")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()

# --- 7. Folium Heatmap (Optional) ---
map_center = [coords['Pickup Centroid Latitude'].mean(), coords['Pickup Centroid Longitude'].mean()]
fmap = folium.Map(location=map_center, zoom_start=11)
heat_data = coords[['Pickup Centroid Latitude', 'Pickup Centroid Longitude']].values.tolist()
HeatMap(heat_data).add_to(fmap)

fmap.save("pickup_heatmap.html")


In [None]:
from folium import Choropleth

# 1. Load GeoJSON and Taxi Data
gdf_tracts = gpd.read_file("tl_2024_17_tract.geojson")
df = pd.read_csv("testing_no_null.csv", low_memory=False)

# 2. Prepare Data
df['Trip Start Timestamp'] = pd.to_datetime(df['Trip Start Timestamp'])

# Convert tract codes to string for joining
gdf_tracts['GEOID'] = gdf_tracts['GEOID'].astype(str)

# 3. Gaussian Mixture Model on pickup coordinates
coords = df[['Pickup Centroid Latitude', 'Pickup Centroid Longitude']]
gmm = GaussianMixture(n_components=5, covariance_type='full').fit(coords)
df['Cluster'] = gmm.predict(df[['Pickup Centroid Latitude', 'Pickup Centroid Longitude']])

# 4. Aggregate: Most common cluster per tract
cluster_map = df.groupby('Pickup Census Tract')['Cluster'].agg(lambda x: x.value_counts().index[0]).reset_index()
cluster_map.columns = ['GEOID', 'Dominant Cluster']

gdf_tracts['GEOID'] = gdf_tracts['GEOID'].astype(str)
cluster_map['GEOID'] = cluster_map['GEOID'].astype(str)

# 5. Merge with GeoDataFrame
gdf_tracts = gdf_tracts.merge(cluster_map, on='GEOID', how='left')

# 6. Create Folium Map
chicago_center = [41.8781, -87.6298]
fmap = folium.Map(location=chicago_center, zoom_start=10)

# 7. Add Colored Census Tracts Based on Cluster
Choropleth(
    geo_data=gdf_tracts,
    data=gdf_tracts,
    columns=['GEOID', 'Dominant Cluster'],
    key_on='feature.properties.GEOID',
    fill_color='Set1',
    fill_opacity=0.6,
    line_opacity=0.2,
    legend_name='Dominant GMM Cluster'
).add_to(fmap)

# Optional: Add tooltip
folium.GeoJson(
    gdf_tracts,
    name="Census Tracts",
    tooltip=folium.GeoJsonTooltip(fields=['GEOID', 'Dominant Cluster'])
).add_to(fmap)

fmap.save("gmm_clusters_by_tract.html")
fmap


In [None]:
from folium.plugins import HeatMap


pickup_map = folium.Map(location=chicago_center, zoom_start=10)

# Add very subtle borders only (no fill, no color scale)
folium.GeoJson(
    gdf_tracts,
    style_function=lambda x: {
        'fillColor': 'transparent',
        'color': 'gray',
        'weight': 0.3,
        'fillOpacity': 0
    },
    tooltip=folium.GeoJsonTooltip(fields=['GEOID'])
).add_to(pickup_map)

# Add heatmap
pickup_coords = df[['Pickup Centroid Latitude', 'Pickup Centroid Longitude']].dropna().values.tolist()
HeatMap(pickup_coords, radius=8, blur=15).add_to(pickup_map)



pickup_map


In [None]:
pickup_map = folium.Map(location=chicago_center, zoom_start=10)

# Add very subtle borders only (no fill, no color scale)
folium.GeoJson(
    gdf_tracts,
    style_function=lambda x: {
        'fillColor': 'transparent',
        'color': 'gray',
        'weight': 0.3,
        'fillOpacity': 0
    },
    tooltip=folium.GeoJsonTooltip(fields=['GEOID'])
).add_to(pickup_map)

# Add heatmap
pickup_coords = df[['Dropoff Centroid Latitude', 'Dropoff Centroid Longitude']].values.tolist()
HeatMap(pickup_coords, radius=8, blur=15).add_to(pickup_map)



pickup_map