**Computes stats for the opening ceremony**

Reads:
* Regular input csv, with gps 'data.csv'
* regular day data csv (generated with the same script) 'sources/h3_modal_mars.csv'

Outputs:
* '../static/data/ceremony/modal_share.json'
* "../static/data/ceremony_h3_modal_share.geojson"
* "../static/data/ceremony_h3_modal_change.geojson"



In [None]:
# Configuration
INPUT_CSV_FILE = "sources/data_france_ceremonie_jo.csv"
INPUT_REGULAR_DAY_H3_MODAL_SHARE_FILE = "sources/h3_modal_mars_tc_in.csv" # Matches the OUTPUT_H3_MODAL_SHARE_FILE, for a previous day
PARIS_GEOJSON_PERIMETER_FILE = "sources/paris.geojson"
RED_ZONE_GEOJSON_PERIMETER_FILE = "sources/ceremony_red.geojson"
BLACK_ZONE_GEOJSON_PERIMETER_FILE = "sources/ceremony_silt.geojson"

OUTPUT_FOLDER = "../static/data/ceremony/"
OUTPUT_MODAL_SHARE_FILE = '../static/data/ceremony/modal_share.json'
OUTPUT_H3_MODAL_SHARE_FILE = "../static/data/ceremony_h3_modal_share_tc_in_clean.geojson"
OUTPUT_H3_MODAL_CHANGE_FILE = "../static/data/ceremony_h3_modal_change_tc_in_clean.geojson"

OUTPUT_PARIS_COUNT_PER_15_MIN_FILE = "../static/data/ceremony/trips_per_15_min.json"
OUTPUT_BLACK_ZONE_ENTRY_COUNT_PER_15_MIN_FILE = '../static/data/ceremony/black_zone_entry.json'
OUTPUT_BLACK_ZONE_EXIT_COUNT_PER_15_MIN_FILE = '../static/data/ceremony/black_zone_exits.json'
OUTPUT_RED_ZONE_ENTRY_COUNT_PER_15_MIN_FILE = '../static/data/ceremony/red_zone_entry.json'
OUTPUT_RED_ZONE_EXIT_COUNT_PER_15_MIN_FILE = '../static/data/ceremony/red_zone_exits.json'

In [None]:
import pandas as pd
import geopandas as gpd
import h3pandas
from shapely.geometry import Point, Polygon, LineString
from shapely.vectorized import contains
import json
import folium
import os
import numpy as np
from folium.plugins import GroupedLayerControl
import branca.colormap as cm
from mappymatch.constructs.geofence import Geofence

In [None]:
# Red zone = no motorized transport
# Black zone = no movement, except ticket holders
red_zone = Geofence.from_geojson(RED_ZONE_GEOJSON_PERIMETER_FILE)
black_zone = Geofence.from_geojson(BLACK_ZONE_GEOJSON_PERIMETER_FILE)
paris = Geofence.from_geojson(PARIS_GEOJSON_PERIMETER_FILE)

In [None]:
df = pd.read_csv(INPUT_CSV_FILE)

In [None]:
df

In [None]:
tr = {
-10 : "NOT_DEFINED",
0 : "UNKNOWN",
1 : "PASSENGER_CAR",
2 : "MOTORCYCLE",
3 : "HEAVY_DUTY_VEHICLE",
4 : "BUS",
5 : "COACH",
6 : "RAIL_TRIP",
7 : "BOAT_TRIP",
8 : "BIKE_TRIP",
9 : "PLANE",
10 : "SKI",
11 : "FOOT",
12 : "IDLE",
13 : "OTHER",
101 : "SCOOTER",
102 : "HIGH_SPEED_TRAIN"
}
tre = {
    -10: "",
    1: "Essence",
    2: "Diesel",
    3: "Electrique",
    4: "Essence Hybride",
    5: "Diesiel Hybride"
}
df['transportation_mode_tr'] = df['transportation_mode'].apply(lambda x: tr[x])
df['engine_type_tr'] = df['engine_type'].apply(lambda x: tre[x]) # Unused

In [None]:
# Convert columns to datetime
df['start_time'] = pd.to_datetime(df['start_time'], format='mixed')
df['end_time'] = pd.to_datetime(df['end_time'], format='mixed')

# Calculate duration in seconds
df['duration'] = (df['end_time'] - df['start_time']).dt.total_seconds()

In [None]:
df = df.rename(columns={"moover_id": "user_id"})

In [None]:
journey_df = df.groupby("journey_id").agg(
    journey_starting_longitude=('starting_longitude', 'first'),
    journey_starting_latitude=('starting_latitude', 'first'),
    journey_ending_longitude=('ending_longitude', 'last'),
    journey_ending_latitude=('ending_latitude', 'last'),
).reset_index()
df = pd.merge(df, journey_df, on="journey_id")
df

In [None]:
df["begins_in_paris"] = contains(paris.geometry, df["journey_starting_longitude"], df["journey_starting_latitude"])
df["ends_in_paris"] = contains(paris.geometry, df["journey_ending_longitude"], df["journey_ending_latitude"])
df["in_paris"] = df["begins_in_paris"] | df["ends_in_paris"]
df["begins_in_red_zone"] = contains(red_zone.geometry, df["journey_starting_longitude"], df["journey_starting_latitude"])
df["ends_in_red_zone"] = contains(red_zone.geometry, df["journey_ending_longitude"], df["journey_ending_latitude"])
df["red_zone"] = df["begins_in_red_zone"] | df["ends_in_red_zone"]
df["begins_in_black_zone"] = contains(black_zone.geometry, df["journey_starting_longitude"], df["journey_starting_latitude"])
df["ends_in_black_zone"] = contains(black_zone.geometry, df["journey_ending_longitude"], df["journey_ending_latitude"])
df["black_zone"] = df["begins_in_black_zone"] | df["ends_in_black_zone"]

In [None]:
df[df["in_paris"]]

In [None]:
df[df["red_zone"]]

In [None]:
df[df["black_zone"]]

### Compute general modal share stats, not using gps data

In [None]:
# General stats
def compute_stats(filtered_df):
    mode_share = filtered_df.groupby(['transportation_mode_tr']).agg(Count=('journey_id', 'nunique'), Duration=('duration', 'sum'), Distance=('distance_km', 'sum'))
    mode_share_dict = mode_share.to_dict()
    mode_share_dict['Total_Count'] = filtered_df["journey_id"].nunique()
    mode_share_dict['Total_Users'] = filtered_df["user_id"].nunique()
    mode_share_dict['Total_Duration'] = filtered_df["duration"].sum()
    mode_share_dict['Total_Distance'] = filtered_df["distance_km"].sum()
    mode_share_dict['Total_Emission'] = filtered_df["emission_kg"].sum()
    mode_share_percents_count = mode_share["Count"] / mode_share_dict['Total_Count']
    mode_share_percents_duration = mode_share["Duration"] / mode_share_dict['Total_Duration']
    mode_share_percents_distance = mode_share["Distance"] / mode_share_dict['Total_Distance']

    return {
        "stats": mode_share_dict,
        "percents_count": mode_share_percents_count.to_dict(),
        "percents_duration": mode_share_percents_duration.to_dict(),
        "percents_distance": mode_share_percents_distance.to_dict()
    }

mode_share = {
    "all": compute_stats(df[df["in_paris"]]),
    "red_zone": compute_stats(df[df["red_zone"]]),
    "black_zone": compute_stats(df[df["black_zone"]]),
}
os.makedirs(os.path.dirname(OUTPUT_FOLDER), exist_ok=True)
with open(OUTPUT_MODAL_SHARE_FILE, 'w') as f:
    f.write(json.dumps(mode_share))
mode_share

### Compute h3 cell stats, using gps

In [None]:
#df_src = pd.read_csv("sources/data_idf 21-23 mars 24.csv")
df_src = df#[df["in_paris"]]

In [None]:
#df_src = df_src.rename(columns={"moover_id": "user_id"})

In [None]:
# Filter starting points in paris, for performance, might not be needed
#df_src["in_paris"] = contains(paris.geometry, df_src["longitude"], df_src["latitude"])
df_src_s = df_src[df_src["in_paris"]]
df_src_s

In [None]:
# Extract relevant columns from the DataFrame
trace_gps_col = df_src_s['gps_trace'].apply(json.loads)
user_id_col = df_src_s['user_id']
transportation_mode_col = df_src_s['transportation_mode']
isNMT_col = transportation_mode_col.isin([11, 8, 4, 6]).astype(int)

In [None]:
trace_gps_col = trace_gps_col.apply(lambda x: x[0])

In [None]:
# Calculate number of points for each trace
num_points_col = trace_gps_col.apply(len)

# Preallocate arrays for the final DataFrame
total_points = num_points_col.sum()
lon = np.empty(total_points)
lat = np.empty(total_points)
hour = np.empty(total_points, dtype=int)
trace_id = np.empty(total_points, dtype=int)
isNMT = np.empty(total_points, dtype=int)
user_id = np.empty(total_points, dtype=object)
transportation_mode = np.empty(total_points, dtype=int)

# Fill the arrays
index = 0
for idx, (trace_gps, user_id_val, transportation_mode_val, isNMT_val, num_points) in enumerate(zip(trace_gps_col, user_id_col, transportation_mode_col, isNMT_col, num_points_col)):
    trace_id[index:index+num_points] = idx
    isNMT[index:index+num_points] = isNMT_val
    user_id[index:index+num_points] = user_id_val
    transportation_mode[index:index+num_points] = transportation_mode_val
    
    trace_gps_array = np.array(trace_gps)
    lon[index:index+num_points] = trace_gps_array[:, 0]
    lat[index:index+num_points] = trace_gps_array[:, 1]
    
    index += num_points

# Create the final DataFrame
new_df = pd.DataFrame({
    'lon': lon,
    'lat': lat,
    'trace_id': trace_id,
    'isNMT': isNMT,
    'user_id': user_id,
    'transportation_mode': transportation_mode
})
new_df


In [None]:
# Filter only points in paris, for performance, might not be needed
new_df["in_paris"] = contains(paris.geometry, new_df["lon"], new_df["lat"])
new_df = new_df[new_df["in_paris"]]

In [None]:
dfh3 = new_df.h3.geo_to_h3(9, lat_col="lat", lng_col="lon", set_index=False)

In [None]:
df_unique_user = dfh3.drop_duplicates(subset=['h3_09', 'user_id'])
drawgeoframe = df_unique_user[['h3_09', 'isNMT']].groupby(['h3_09']).agg(Count=('isNMT', np.size), Sum=('isNMT', 'sum'))
drawgeoframe=drawgeoframe.reset_index().set_index('h3_09')

In [None]:
drawgeoframe = drawgeoframe.h3.h3_to_geo_boundary()

In [None]:
drawgeoframe['percent'] = drawgeoframe['Sum']*100 / drawgeoframe['Count']
drawgeoframe = drawgeoframe[drawgeoframe['Count'] > 3]
drawgeoframe

In [None]:
colormap = cm.LinearColormap(["red", "yellow", "green"], vmin=0, vmax=100)
drawgeoframe["color"] = drawgeoframe["percent"].apply(lambda x: colormap(x)[:-2])

start_lat = 48.8915079
start_long = 2.3495425
m = folium.Map(location=[start_lat, start_long], zoom_start=13)
folium.TileLayer('openstreetmap').add_to(m)
folium.TileLayer('cartodbdark_matter').add_to(m)

folium.GeoJson(drawgeoframe, style_function=lambda f: {"color": f['properties']['color']}).add_to(m)
m

In [None]:
drawgeoframe.to_file(OUTPUT_H3_MODAL_SHARE_FILE, driver="GeoJSON")

### Evolutions compared to a normal day

In [None]:
ceremony_geodataframe = drawgeoframe
regularday_geodataframe = pd.read_csv(INPUT_REGULAR_DAY_H3_MODAL_SHARE_FILE)

In [None]:
merged = ceremony_geodataframe.merge(regularday_geodataframe, how='inner', on='h3_09')
merged["modal_share_percent_diff"] = merged["percent_x"] - merged["percent_y"]
merged["Count_percent_diff"] = (merged["Count_x"] - merged["Count_y"]) * 100.0 / merged["Count_y"]

final = merged[["geometry_x", "modal_share_percent_diff", "Count_percent_diff", "percent_x", "percent_y", "Count_x", "Count_y"]]
final = final.rename(columns={"geometry_x": "geometry", "percent_x": "modal_share_percent_ceremony", "percent_y": "modal_share_percent_regularday", "Count_x": "count_ceremony", "Count_y": "Count_regularday"})

colormap = cm.LinearColormap(["red", "white", "green"], vmin=-50, vmax=50)
final["color"] = final["modal_share_percent_diff"].apply(lambda x: colormap(x)[:-2])
final = gpd.GeoDataFrame(final, crs="EPSG:4326")
final

In [None]:
final.to_file(OUTPUT_H3_MODAL_CHANGE_FILE, driver="GeoJSON")

### Count trips per mode per 15 minutes, depending on zone, and export to chartjs compatible json

In [None]:
def convert_df_to_chartjs_barchart_json(source_df):
    # Pivot the DataFrame
    pivot_df = source_df.pivot_table(index=source_df.index.strftime('%H:%M'), columns='transportation_mode_tr', values='count', fill_value=0)

    # Prepare the JSON structure
    chart_data = {
        'labels': pivot_df.index.tolist(),
        'datasets': [
            {
                'label': transport_mode,
                'data': pivot_df[transport_mode].tolist()
            } for transport_mode in pivot_df.columns
        ]
    }

    # Convert to JSON
    return json.dumps(chart_data)

#### Trips starting and ending in paris zone

In [None]:
starts_in_paris = contains(paris.geometry, df["starting_longitude"], df["starting_latitude"])
ends_in_paris = contains(paris.geometry, df["ending_longitude"], df["ending_latitude"])
fully_in_paris = df[starts_in_paris & ends_in_paris]

# Optional display
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#    display(fully_in_paris.set_index('start_time').groupby([pd.Grouper(freq='15min'), 'transportation_mode_tr']).size())

paris_count_per_15 = fully_in_paris.set_index('start_time').groupby([pd.Grouper(freq='15min'), 'transportation_mode_tr']).size().rename('count').reset_index().set_index('start_time')

In [None]:
with open(OUTPUT_PARIS_COUNT_PER_15_MIN_FILE, 'w') as f:
    f.write(convert_df_to_chartjs_barchart_json(paris_count_per_15))

#### Black Zone

In [None]:
starts_in_black_zone = contains(black_zone.geometry, df["starting_longitude"], df["starting_latitude"])
ends_in_black_zone = contains(black_zone.geometry, df["ending_longitude"], df["ending_latitude"])

##### Trips entering black zone

In [None]:
entering_black_zone = df[~starts_in_black_zone & ends_in_black_zone]

black_zone_entry_count_per_15 = entering_black_zone.set_index('end_time').groupby([pd.Grouper(freq='15min'), 'transportation_mode_tr']).size().rename('count').reset_index().set_index('end_time')

In [None]:
with open(OUTPUT_BLACK_ZONE_ENTRY_COUNT_PER_15_MIN_FILE, 'w') as f:
    f.write(convert_df_to_chartjs_barchart_json(black_zone_entry_count_per_15))

##### Trips leaving black zone

In [None]:
exiting_black_zone = df[starts_in_black_zone & ~ends_in_black_zone]

black_zone_exits_count_per_15 = exiting_black_zone.set_index('start_time').groupby([pd.Grouper(freq='15min'), 'transportation_mode_tr']).size().rename('count').reset_index().set_index('start_time')

In [None]:
with open(OUTPUT_BLACK_ZONE_EXIT_COUNT_PER_15_MIN_FILE, 'w') as f:
    f.write(convert_df_to_chartjs_barchart_json(black_zone_exits_count_per_15))

#### Red Zone

In [None]:
starts_in_red_zone = contains(red_zone.geometry, df["starting_longitude"], df["starting_latitude"])
ends_in_red_zone = contains(red_zone.geometry, df["ending_longitude"], df["ending_latitude"])

##### Trips entering red zone

In [None]:
entering_red_zone = df[~starts_in_red_zone & ends_in_red_zone]

red_zone_entry_count_per_15 = entering_red_zone.set_index('end_time').groupby([pd.Grouper(freq='15min'), 'transportation_mode_tr']).size().rename('count').reset_index().set_index('end_time')

In [None]:
with open(OUTPUT_RED_ZONE_ENTRY_COUNT_PER_15_MIN_FILE, 'w') as f:
    f.write(convert_df_to_chartjs_barchart_json(red_zone_entry_count_per_15))

##### Trips leaving red zone

In [None]:
exiting_red_zone = df[starts_in_red_zone & ~ends_in_red_zone]

red_zone_exits_count_per_15 = exiting_red_zone.set_index('start_time').groupby([pd.Grouper(freq='15min'), 'transportation_mode_tr']).size().rename('count').reset_index().set_index('start_time')

In [None]:
with open(OUTPUT_RED_ZONE_EXIT_COUNT_PER_15_MIN_FILE, 'w') as f:
    f.write(convert_df_to_chartjs_barchart_json(red_zone_exits_count_per_15))