In [12]:
pip install folium

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [None]:
import folium
import pandas as pd
from folium.plugins import HeatMap
from folium.plugins import AntPath

def folium_user_trace(df, deviceid, output_path="user_trace_map.html"):
    # Ensure datetime exists
    if "datetime" not in df.columns:
        df["date"] = df["date"].astype(str)
        df["time"] = df["time"].astype(str)
        df["datetime"] = pd.to_datetime(df["date"] + " " + df["time"], dayfirst=True)

    df_user = df[df['deviceid'] == deviceid].sort_values("datetime")

    if df_user.empty:
        print(f"No data found for deviceid {deviceid}")
        return

    # Center of the map
    center = [df_user['lat'].mean(), df_user['lon'].mean()]
    m = folium.Map(location=center, zoom_start=13, tiles="CartoDB Positron")

    # Add path
    coords = df_user[['lat', 'lon']].values.tolist()
    AntPath(locations=coords, color='blue', weight=3, delay=1000).add_to(m)
    m.save(output_path)
    print(f"Saved user trace map: {output_path}")
    
def folium_dual_trace(df_raw, df_clean, deviceid, output_path="user_trace_compare.html"):
    # Prepare datetime
    for df in [df_raw, df_clean]:
        if "datetime" not in df.columns:
            df["date"] = df["date"].astype(str)
            df["time"] = df["time"].astype(str)
            df["datetime"] = pd.to_datetime(df["date"] + " " + df["time"], dayfirst=True)

    # Filter and sort
    df_r = df_raw[df_raw['deviceid'] == deviceid].sort_values("datetime")
    df_c = df_clean[df_clean['deviceid'] == deviceid].sort_values("datetime")

    if df_r.empty or df_c.empty:
        print("One of the traces is empty.")
        return

    # Center of the map
    center = [
        (df_r['lat'].mean() + df_c['lat'].mean()) / 2,
        (df_r['lon'].mean() + df_c['lon'].mean()) / 2
    ]
    m = folium.Map(location=center, zoom_start=13, tiles="CartoDB Positron")

    # Add raw trace (blue)
    coords_raw = df_r[['lat', 'lon']].values.tolist()
    AntPath(locations=coords_raw, color='blue', weight=4, delay=1000).add_to(m)

    # Add clean trace (red)
    coords_clean = df_c[['lat', 'lon']].values.tolist()
    AntPath(locations=coords_clean, color='red', weight=4, delay=1000).add_to(m)

    folium.LayerControl().add_to(m)
    m.save(output_path)
    print(f"Saved dual trace map to {output_path}")


def folium_heatmap(df, output_path="heatmap.html", sample_frac=0.01):
    if sample_frac < 1.0:
        df = df.sample(frac=sample_frac, random_state=42)

    center = [df['lat'].mean(), df['lon'].mean()]
    m = folium.Map(location=center, zoom_start=8, tiles="CartoDB Positron")

    heat_data = df[["lat", "lon"]].values.tolist()
    HeatMap(heat_data, radius=8, blur=6, min_opacity=0.3).add_to(m)

    m.save(output_path)
    print(f"Saved map to {output_path}")
    
def get_percentile_user_id(df, percentile=0.9):
    counts = df["deviceid"].value_counts()
    target_count = counts.quantile(percentile)
    
    # Get the deviceid with count closest to target
    closest_id = (counts - target_count).abs().idxmin()
    return closest_id

In [None]:
df_raw = pd.read_parquet("data/20230331.parquet")
df_clean = pd.read_parquet("data_denoised/20230331.parquet")
device = get_percentile_user_id(df_raw)
print(f"Selected high count device in raw data: {device}")
print(f"Selected high count device in cleaned data: {device}")

# Now visualize

folium_dual_trace(df_raw, df_clean, device, output_path="maps/trace_raw_vs_clean.html")
# folium_user_trace(df_raw, device, output_path="maps/trace_average_user_raw.html")
# folium_heatmap(df_raw, output_path="maps/heatmap_raw.html")
# folium_user_trace(df_clean, device, output_path="maps/trace_average_user_clean.html")

Selected high count device in raw data: 855a9e747bada6ffe75d37a8f7bb247c2c0f63df57adf54dfc754f1b502f0bef
Selected high count device in cleaned data: 855a9e747bada6ffe75d37a8f7bb247c2c0f63df57adf54dfc754f1b502f0bef
Saved user trace map: maps/trace_average_user_raw.html
Saved user trace map: maps/trace_average_user_clean.html


In [3]:
import folium
import geopandas as gpd
import pandas as pd

# Load the grid
gdf = gpd.read_file("maps/minimalist_coning.geojson").to_crs("EPSG:4326")

# Ensure 'zone_id' exists
if "zone_id" not in gdf.columns:
    gdf["zone_id"] = pd.Series(range(len(gdf)), dtype="int32")

# Load the transition matrix
df_matrix = pd.read_csv("data/global_transition_matrix.csv", index_col=0)

# Extract involved zone IDs
row_ids = df_matrix.index.astype(float).astype(int)
col_ids = df_matrix.columns.astype(float).astype(int)
involved_ids = set(row_ids).union(col_ids)

# Flag involved zones
gdf["involved"] = gdf["zone_id"].isin(involved_ids)

# Make the map
center = gdf.geometry.centroid.unary_union.centroid.coords[0][::-1]
m = folium.Map(location=center, zoom_start=9, tiles="CartoDB positron")

# Draw zones
for _, row in gdf.iterrows():
    color = "#ff4c4c" if row["involved"] else "#dddddd"
    folium.GeoJson(
        row["geometry"],
        style_function=lambda feature, color=color: {
            "fillColor": color,
            "color": "black",
            "weight": 0.5,
            "fillOpacity": 0.6
        },
        tooltip=f'Zone {row["zone_id"]}'
    ).add_to(m)

# Save map
m.save("highlighted_zones_map.html")


  center = gdf.geometry.centroid.unary_union.centroid.coords[0][::-1]
  center = gdf.geometry.centroid.unary_union.centroid.coords[0][::-1]
