**Computes stats per JO locations**

**Input**: csv using the following columns:
* `transportation_mode`: used mode of transport for the trip (see table below)
* `start_time`: datetime of begining of trip
* `end_time`: datetime of end of trip
* `user_id`: Id of the traveling user, used to make sure results include more than one user per geographic division
* `trace_gps`: list of (lon, lat) tuples

**Output**: JSON files for each location (eg. "../static/data/sites/Arena Bercy/modal_share.json") containing the following data:
* `start`: Total number of trips, duration and distance per transport mode, for trips starting from location (departures)
* `start_percents_count`: Mode share based on number of trips, for departures
* `start_percents_duration`: Mode share based on duration of trips, for departures
* `start_percents_distance`: Mode share based on distance of trips, for departures
* `end`: Total number of trips, duration and distance per transport mode, for trips ending in location (arrivals)
* `end_percents_count`: Mode share based on number of trips, for arrivals
* `end_percents_duration`: Mode share based on duration of trips, for arrivals
* `end_percents_distance`: Mode share based on distance of trips, for arrivals

**Output**: JSON file with locations sorted by popularity (number of trips) ("../static/data/sites_popularity.json"), formated as an array of objects:
* `name`: Name of location
* `total_trips`: Total number of trips starting and ending at location
* `arrivals`: Number of trips ending in location
* `departures`: Number of trips starting from location
* `prefered_arrival_mode`: Most used transport mode for arrivals
* `prefered_departure_mode`: Most used transport mode for departures

**Output**: GeoJSON files listing popular h3 cells for start or end points of trips arriving or leaving each location (e.g. "../static/data/sites/Arena Bercy/origin_zones.geojson" and "../static/data/sites/Arena Bercy/destination_zones.geojson")
* The geoJSON contains a `color` metadata from green to red depending on the popularity of the cell


For anonymity, there must be at least 3 different users in a given cell for data to be considered


In [None]:
!pip install geopy

In [None]:
import pandas as pd
import geopandas as gpd
import h3pandas
from shapely.geometry import Point, Polygon, LineString
import json
import folium
import os
from geopy.distance import great_circle
import numpy as np

In [None]:
df = pd.read_csv("sources/data.csv")

In [None]:
df

In [None]:
tr = {
-10 : "NOT_DEFINED",
0 : "UNKNOWN",
1 : "PASSENGER_CAR",
2 : "MOTORCYCLE",
3 : "HEAVY_DUTY_VEHICLE",
4 : "BUS",
5 : "COACH",
6 : "RAIL_TRIP",
7 : "BOAT_TRIP",
8 : "BIKE_TRIP",
9 : "PLANE",
10 : "SKI",
11 : "FOOT",
12 : "IDLE",
13 : "OTHER",
101 : "SCOOTER",
102 : "HIGH_SPEED_TRAIN"
}
df['transportation_mode_tr'] = df['transportation_mode'].apply(lambda x: tr[x])

In [None]:
def decode_gps(trace_gps):
    return json.loads(trace_gps)
df["gps_decoded"] = df['trace_gps'].apply(decode_gps)

In [None]:
df['end_longitude'] = df["gps_decoded"].apply(lambda x: x[-1][0])
df['end_latitude'] = df["gps_decoded"].apply(lambda x: x[-1][1])

In [None]:
# Function to compute the total travelled distance using geopy
def total_distance(coords):
    total_dist = 0.0
    for i in range(len(coords) - 1):
        total_dist += great_circle(coords[i][::-1], coords[i + 1][::-1]).kilometers
    return total_dist

# Applying the total_distance function to the DataFrame
df['total_distance'] = df['gps_decoded'].apply(total_distance)


In [None]:
# Convert columns to datetime
df['start_time'] = pd.to_datetime(df['start_time'])
df['end_time'] = pd.to_datetime(df['end_time'])

# Calculate duration in seconds
df['duration'] = (df['end_time'] - df['start_time']).dt.total_seconds()

In [None]:
df

In [None]:
dfh3_start = df.h3.geo_to_h3(9, lat_col="latitude", lng_col="longitude", set_index=False)
dfh3_end = df.h3.geo_to_h3(9, lat_col="end_latitude", lng_col="end_longitude", set_index=False)

In [None]:
# h3 level 9 cells loosely covering every olympic location
# For bigger sites, we try to cover the whole site with many cells
# There might be imprecisions:
# * With people parking far outside the cell (even though the trip should continue with a walking segment)
# * With people stopping in the cell, but not going to the Olympic location (very true for locations near train stations)
# * With people moving within the cell

SITES_OLYMPIQUES = {
    #"saint_lazare": {"h3": ["891fb475b37ffff"]},
    #"gare_du_nord": {"h3": ["891fb4660dbffff"]},
    #"gare_de_lest": {"h3": ["891fb466053ffff", "891fb4660cfffff", "891fb4660cbffff"]},
    "addidas_arena": {"name": "Arena Porte de La Chapelle", "h3": ["891fb4664afffff", "891fb4664abffff", "891fb466433ffff"]},
    "pont_alexandre_trois": {"name": "Pont Alexandre III", "h3": ["891fb4675d3ffff"]},
    "invalides": {"name": "Invalides", "h3": ["891fb4675dbffff"]},
    "grand_palais": {"name": "Grand Palais", "h3": ["891fb475b6bffff"]},
    "tour_eiffel": {"name": "Stade Tour Eiffel", "h3": ["891fb46741bffff", "891fb467413ffff", "891fb467403ffff", "891fb46740bffff"]},
    "trocadero": {"name": "Trocadéro", "h3": ["891fb4674d7ffff"]},
    "grand_palais_ephemere": {"name": "Arena Champ-de-Mars", "h3": ["891fb467477ffff"]},
    "place_concorde": {"name": "La Concorde", "h3": ["891fb46759bffff"]},
    "porte_versailles": {"name": "Arena Paris Sud", "h3": ["891fb467673ffff"]},
    "arena_bercy": {"name": "Arena Bercy", "h3": ["891fb46440fffff"]},
    "parc_des_princes": {"name": "Parc des Princes", "h3": ["891fb462b8bffff","891fb462b8fffff", "891fb462b13ffff"]},
    "rolland_garros": {"name": "Stade Roland-Garros", "h3": ["891fb462867ffff"]},
    "la_defense_arena": {"name": "Paris La Défense Arena", "h3": ["891fb475313ffff", "891fb47538fffff"]},
    "stade_de_france": {"name": "Stade de France", "h3": ["891fb474b83ffff", "891fb474b9bffff", "891fb474b93ffff", "891fb474b97ffff", "891fb474b87ffff"]},
    "villepinte" : {"name": "Arena Paris Nord", "h3": ["891fb428197ffff", "891fb42aa5bffff" , "891fb42aa4bffff", "891fb42aa43ffff", "891fb42aa57ffff", "891fb4281b3ffff", "891fb42aa4fffff", "891fb42aa47ffff", "891fb42aa0bffff", "891fb4281b7ffff", "891fb42aa7bffff", "891fb42aa73ffff", "891fb42aa0fffff", "891fb42aa6bffff", "891fb42aa63ffff", "891fb42aa77ffff", "891fb42aa3bffff", "891fb42aa67ffff", "891fb42aa2bffff"]},
    "centre_aquatique_st_denis" : {"name": "Centre Aquatique", "h3": ["891fb474b9bffff"]},
    "tir_chateauroux" : {"name": "Centre National de Tir de Châteauroux", "h3": ["89186dd5027ffff", "89186dd51cbffff", "89186dd515bffff", "89186dd5153ffff", "89186dd51cfffff", "89186dd514bffff", "89186dd5143ffff", "89186dd5157ffff", "89186dd514fffff", "89186dd5147ffff", "89186dd510bffff", "89186dd5173ffff", "89186dd510fffff"]},
    "chateau_de_versailles" : {"name": "Château de Versailles", "h3": ["891fb4632d3ffff", "891fb4632c3ffff", "891fb4632d7ffff", "891fb4632c7ffff"]},
    "colline_elancourt" : {"name": "Colline d'Elancourt", "h3": ["891fb4782afffff", "891fb478237ffff", "891fb478233ffff", "891fb4782abffff", "891fb478207ffff", "891fb47823bffff", "891fb4783c3ffff"]},
    # "" : {"name": "Hôtel de Ville", "h3": []},
    # "" : {"name": "Golf National", "h3": []},
    # "" : {"name": "Marina de Marseille", "h3": []},
    # "" : {"name": "Site d'escalade Bourget", "h3": []},
    "bmx_st_quentin" : {"name": "Stade BMX de Saint-Quentin-en-Yvelines", "h3": ["891fb478e37ffff", "891fb4788cbffff", "891fb4788cfffff", "891fb478e23ffff", "891fb478e27ffff", "891fb478853ffff"]},
    # "" : {"name": "Stade de Bordeaux", "h3": []},
    # "" : {"name": "Stade de la Beaujoire", "h3": []},
    # "" : {"name": "Stade de Lyon", "h3": []},
    # "" : {"name": "Stade de Marseille", "h3": []},
    # "" : {"name": "Stade de Nice", "h3": []},
    # "" : {"name": "Stade Geoffroy-Guichard", "h3": []},
    # "" : {"name": "Stade Nautique de Vaires-sur-Marne", "h3": []},
    # "" : {"name": "Stade Pierre Mauroy", "h3": []},
    # "" : {"name": "Stade Yves-du-Manoir", "h3": []},
    # "" : {"name": "Teahupo'o, Tahiti", "h3": []},
    # "" : {"name": "Vélodrome National de Saint-Quentin-en-Yvelines", "h3": []}
}

In [None]:
def compute_modal_share(site_data):
    dfh3_filter_start = dfh3_start[dfh3_start["h3_09"].isin(site_data["h3"])]
    dfh3_filter_end = dfh3_end[dfh3_end["h3_09"].isin(site_data["h3"])]

    # Do not display information if cell has less than 4 users
    if dfh3_filter_start['user_id'].nunique() < 4 or dfh3_filter_end['user_id'].nunique() < 4:
        return

    mode_share_start = dfh3_filter_start.groupby(['transportation_mode_tr']).agg(Count=('transportation_mode_tr', np.size), Duration=('duration', np.sum), Distance=('total_distance', np.sum))
    mode_share_start_dict = mode_share_start.to_dict()
    mode_share_start_dict['Total_Count'] = len(dfh3_filter_start)
    mode_share_start_dict['Total_Duration'] = dfh3_filter_start["duration"].sum()
    mode_share_start_dict['Total_Distance'] = dfh3_filter_start["total_distance"].sum()
    mode_share_start_percents_count = mode_share_start["Count"] / mode_share_start_dict['Total_Count']
    mode_share_start_percents_duration = mode_share_start["Duration"] / mode_share_start_dict['Total_Duration']
    mode_share_start_percents_distance = mode_share_start["Distance"] / mode_share_start_dict['Total_Distance']
    
    mode_share_end = dfh3_filter_end.groupby(['transportation_mode_tr']).agg(Count=('transportation_mode_tr', np.size), Duration=('duration', np.sum), Distance=('total_distance', np.sum))
    mode_share_end_dict = mode_share_end.to_dict()
    mode_share_end_dict['Total_Count'] = len(dfh3_filter_end)
    mode_share_end_dict['Total_Duration'] = dfh3_filter_end["duration"].sum()
    mode_share_end_dict['Total_Distance'] = dfh3_filter_end["total_distance"].sum()
    mode_share_end_percents_count = mode_share_end["Count"] / mode_share_end_dict['Total_Count']
    mode_share_end_percents_duration = mode_share_end["Duration"] / mode_share_end_dict['Total_Duration']
    mode_share_end_percents_distance = mode_share_end["Distance"] / mode_share_end_dict['Total_Distance']
    mode_share = json.dumps({
        "start": mode_share_start_dict,
        "start_percents_count": mode_share_start_percents_count.to_dict(),
        "start_percents_duration": mode_share_start_percents_duration.to_dict(),
        "start_percents_distance": mode_share_start_percents_distance.to_dict(),
        "end": mode_share_end_dict,
        "end_percents_count": mode_share_end_percents_count.to_dict(),
        "end_percents_duration": mode_share_end_percents_duration.to_dict(),
        "end_percents_distance": mode_share_end_percents_distance.to_dict(),
    })
    os.makedirs(os.path.dirname(f'../static/data/sites/{site_data["name"]}/'), exist_ok=True)
    with open(f'../static/data/sites/{site_data["name"]}/modal_share.json', 'w') as f:
        f.write(mode_share)
    return {
        "name": site_data["name"],
        "total_trips": mode_share_start_dict['Total_Count'] + mode_share_end_dict['Total_Count'],
        "arrivals": mode_share_end_dict['Total_Count'],
        "departures": mode_share_start_dict['Total_Count'],
        "prefered_arrival_mode": mode_share_start['Count'].idxmax(),
        "prefered_departure_mode": mode_share_end['Count'].idxmax(),
    }

In [None]:
sites_popularity = []
for site in SITES_OLYMPIQUES:
    data = compute_modal_share(SITES_OLYMPIQUES[site])
    if data:
        sites_popularity.append(data)
sites_popularity

In [None]:
sorted_sites = sorted(sites_popularity, key=lambda d: d['total_trips'], reverse=True)
json.dumps(sorted_sites)
with open('../static/data/sites_popularity.json', 'w') as f:
    f.write(json.dumps(sorted_sites))


In [None]:
import branca.colormap as cm
def compute_destinations(site_data):
    dfh3_filter_start = dfh3_start[dfh3_start["h3_09"].isin(site_data["h3"])]
    dfh3 = dfh3_filter_start.h3.geo_to_h3(7, lat_col="end_latitude", lng_col="end_longitude", set_index=False)
    drawgeoframe_start = dfh3.groupby(['h3_07']).agg(Count=('user_id', np.size), UniqueUsers=('user_id', 'nunique')).reset_index().set_index('h3_07')
    drawgeoframe_start = drawgeoframe_start[drawgeoframe_start["UniqueUsers"] > 3]
    drawgeoframe_start = drawgeoframe_start.h3.h3_to_geo_boundary()
    colormap = cm.LinearColormap(["green", "yellow", "red"], vmin=0, vmax=drawgeoframe_start["Count"].max())
    drawgeoframe_start["color"] = drawgeoframe_start["Count"].apply(lambda x: colormap(x)[:-2])
    if not drawgeoframe_start.empty:
        os.makedirs(os.path.dirname(f'../static/data/sites/{site_data["name"]}/'), exist_ok=True)
        drawgeoframe_start.to_file(f'../static/data/sites/{site_data["name"]}/destination_zones.geojson', driver="GeoJSON")

    dfh3_filter_end = dfh3_end[dfh3_end["h3_09"].isin(site_data["h3"])]
    dfh3 = dfh3_filter_end.h3.geo_to_h3(7, lat_col="latitude", lng_col="longitude", set_index=False)
    drawgeoframe_end = dfh3.groupby(['h3_07']).agg(Count=('user_id', np.size), UniqueUsers=('user_id', 'nunique')).reset_index().set_index('h3_07')
    drawgeoframe_end = drawgeoframe_end[drawgeoframe_end["UniqueUsers"] > 3]
    drawgeoframe_end = drawgeoframe_end.h3.h3_to_geo_boundary()
    colormap = cm.LinearColormap(["green", "yellow", "red"], vmin=0, vmax=drawgeoframe_end["Count"].max())
    drawgeoframe_end["color"] = drawgeoframe_end["Count"].apply(lambda x: colormap(x)[:-2])
    if not drawgeoframe_end.empty:
        os.makedirs(os.path.dirname(f'../static/data/sites/{site_data["name"]}/'), exist_ok=True)
        drawgeoframe_end.to_file(f'../static/data/sites/{site_data["name"]}/origin_zones.geojson', driver="GeoJSON")

#compute_destinations(SITES_OLYMPIQUES["arena_bercy"])

In [None]:
for site in SITES_OLYMPIQUES:
    compute_destinations(SITES_OLYMPIQUES[site])