**Computes large population movements**

**Input**: csv using the following columns:
* `transportation_mode_tr`: used mode of transport for the trip, in readable format
* `longitude` and `latitude`: Starting point of trip
* `end_longitude` and `end_latitude`: Ending point of trip
* `user_id`: Id of the traveling user, used to make sure results include more than 3 users per geographic division

**Output**: geoJSON file "../static/data/exode.geojson" containing h3 cell shapes with the following metadata:
* `geometry`: h3 shape of the destinations
* `MostCommonTransport`: prefered `transportation_mode_tr` for most trips
* `color`: A color representation of `MostCommonTransport`
* `Count`: Total number of trips arriving in cell

**Output**: geoJSON file "../static/data/exode_lines.geojson" containing line shapes with the following metadata:
* `geometry`: LineString strating from paris and ending in the centroid of h3 celles
* `MostCommonTransport`: prefered `transportation_mode_tr` for most trips
* `color`: A color representation of `MostCommonTransport`
* `Count`: Total number of trips arriving in cell


In [None]:
import pandas as pd
from mappymatch.constructs.geofence import Geofence
from shapely.geometry import Point, LineString
from shapely.vectorized import contains
import h3pandas
import geopandas as gpd
import folium
import json
import numpy as np


In [None]:
geofence_idf = Geofence.from_geojson("sources/region-ile-de-france.geojson")

In [None]:
df = pd.read_csv("sources/data_france_ascension_09_et_12_mai_2024.csv")

In [None]:
tr = {
-10 : "NOT_DEFINED",
0 : "UNKNOWN",
1 : "PASSENGER_CAR",
2 : "MOTORCYCLE",
3 : "HEAVY_DUTY_VEHICLE",
4 : "BUS",
5 : "COACH",
6 : "RAIL_TRIP",
7 : "BOAT_TRIP",
8 : "BIKE_TRIP",
9 : "PLANE",
10 : "SKI",
11 : "FOOT",
12 : "IDLE",
13 : "OTHER",
101 : "SCOOTER",
102 : "HIGH_SPEED_TRAIN"
}
df['transportation_mode_tr'] = df['transportation_mode'].apply(lambda x: tr[x])

In [None]:
df

In [None]:
# Aggregate the main features
agg_main = df.groupby('journey_id').agg(
    starting_longitude=('starting_longitude', 'first'),
    starting_latitude=('starting_latitude', 'first'),
    start_time=('start_time', 'first'),
    ending_longitude=('ending_longitude', 'last'),
    ending_latitude=('ending_latitude', 'last'),
    end_time=('end_time', 'last'),
    user_id=('user_id', 'first')
).reset_index()

# Calculate the sum of distances for each transportation mode within each journey
agg_distance = df.groupby(['journey_id', 'transportation_mode_tr']).agg(
    total_distance=('distance_km', 'sum')
).reset_index()

# Sort the distance aggregation and find the top two transportation modes for each journey
agg_distance_sorted = agg_distance.sort_values(by=['journey_id', 'total_distance'], ascending=[True, False])

# Get the top two transportation modes for each journey
agg_distance_top2 = agg_distance_sorted.groupby('journey_id').head(2).reset_index(drop=True)

# Split the top two transportation modes into separate columns
agg_distance_top2['rank'] = agg_distance_top2.groupby('journey_id').cumcount() + 1
agg_distance_pivot = agg_distance_top2.pivot(index='journey_id', columns='rank', values=['transportation_mode_tr', 'total_distance']).reset_index()

# Rename columns for clarity
agg_distance_pivot.columns = ['journey_id', 
                              'top_transportation_mode_tr', 'second_top_transportation_mode_tr', 
                              'top_transportation_mode_distance', 'second_top_transportation_mode_distance']

# Merge the results
result = pd.merge(agg_main, agg_distance_pivot, on='journey_id', how='left')

In [None]:
result[result["second_top_transportation_mode_distance"] > 0]

In [None]:
result["merge_transportation_mode_tr"] = result["top_transportation_mode_tr"] + result["second_top_transportation_mode_tr"].fillna('')
result

In [None]:
df = result

In [None]:
#geofence_paris_geometry = geofence_paris.geometry

# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.starting_longitude, df.starting_latitude), crs="EPSG:4326")


In [None]:
geofence_idf_geometry = geofence_idf.geometry

# Filter points within the geofence
gdf_from_idf = gdf[gdf.geometry.within(geofence_idf_geometry)]

# Create end_geometry for the end points
gdf_from_idf['end_geometry'] = gpd.points_from_xy(gdf_from_idf.ending_longitude, gdf_from_idf.ending_latitude)

# Filter points where end location is outside the geofence
gdf_exit_idf = gdf_from_idf[~gdf_from_idf['end_geometry'].within(geofence_idf_geometry)]

# Drop the temporary 'end_geometry' column if not needed
gdf_exit_idf = gdf_exit_idf.drop(columns=['geometry', 'end_geometry'])
gdf_exit_idf

In [None]:
dfh3 = gdf_exit_idf[gdf_exit_idf["start_time"] < "2024-05-10"].h3.geo_to_h3(4, lat_col="ending_latitude", lng_col="ending_longitude", set_index=False)
df_unique_user = dfh3.drop_duplicates(subset=['h3_04', 'user_id'])
drawgeoframe = df_unique_user[['h3_04']].groupby(['h3_04']).agg(Count=('h3_04', np.size))
drawgeoframe=drawgeoframe.reset_index().set_index('h3_04')
drawgeoframe = drawgeoframe[drawgeoframe['Count'] > 3]
drawgeoframe = drawgeoframe.h3.h3_to_geo()
drawgeoframe["center_geom"] = drawgeoframe["geometry"]
drawgeoframe = drawgeoframe.h3.h3_to_geo_boundary()
drawgeoframe

In [None]:
fixed_point = Point(2.333333, 48.866667)

# Function to create a line from the fixed point to each point
def create_line(point):
    return LineString([fixed_point, point])

# Apply the function to each geometry in the GeoDataFrame
drawgeoframe['geom'] = drawgeoframe['center_geom'].apply(create_line)
drawgeoframe.max()

In [None]:
import branca.colormap as cm
colormap = cm.LinearColormap(["green", "yellow", "red"], vmin=0, vmax=50)
drawgeoframe["color"] = drawgeoframe["Count"].apply(lambda x: colormap(x)[:-2])

start_lat = 48.8915079
start_long = 2.3495425
m = folium.Map(location=[start_lat, start_long], zoom_start=13)
folium.GeoJson(drawgeoframe[["geometry", "color"]], style_function=lambda f: {"color": f['properties']['color']}).add_to(m)
folium.GeoJson(drawgeoframe[["geom", "color"]].rename(columns={"geom": "geometry"}), style_function=lambda f: {"color": f['properties']['color']}).add_to(m)
m

In [None]:
def most_common_value(series):
    return series.mode().iloc[0]
dfh3 = gdf_exit_idf.h3.geo_to_h3(4, lat_col="ending_latitude", lng_col="ending_longitude", set_index=False)
df_unique_user = dfh3.drop_duplicates(subset=['h3_04', 'user_id'])
drawgeoframe = df_unique_user[['h3_04', 'top_transportation_mode_tr']].groupby(['h3_04']).agg(Count=('h3_04', np.size), MostCommonTransport=('top_transportation_mode_tr', most_common_value))
drawgeoframe=drawgeoframe.reset_index().set_index('h3_04')
drawgeoframe = drawgeoframe[drawgeoframe['Count'] > 3]
drawgeoframe = drawgeoframe.h3.h3_to_geo()
drawgeoframe["center_geom"] = drawgeoframe["geometry"]
drawgeoframe = drawgeoframe.h3.h3_to_geo_boundary()
drawgeoframe

In [None]:
fixed_point = Point(2.333333, 48.866667)

# Function to create a line from the fixed point to each point
def create_line(point):
    return LineString([fixed_point, point])

# Apply the function to each geometry in the GeoDataFrame
drawgeoframe['geom'] = drawgeoframe['center_geom'].apply(create_line)

In [None]:
colormap = {
    "PLANE": "red",
    "PASSENGER_CAR": "orange",
    "PASSENGER_CARFOOT": "darkorange",
    "RAIL_TRIP": "green",
    "HIGH_SPEED_TRAIN": "green",
    "HIGH_SPEED_TRAINRAIL_TRIP": "darkgreen",
    "PASSENGER_CARRAIL_TRIP": "yellow",
    "FOOT": "black" # wtf
}
drawgeoframe["color"] = drawgeoframe["MostCommonTransport"].apply(lambda x: colormap.get(x, "gray"))

start_lat = 48.8915079
start_long = 2.3495425
m = folium.Map(location=[start_lat, start_long], zoom_start=13)
folium.GeoJson(drawgeoframe[["geometry", "color"]], style_function=lambda f: {"color": f['properties']['color']}).add_to(m)
drawgeoframe[["geometry", "color", "Count", "MostCommonTransport"]].to_file("../static/data/exode.geojson", driver="GeoJSON")
folium.GeoJson(drawgeoframe[["geom", "color"]].rename(columns={"geom": "geometry"}), style_function=lambda f: {"weight": "0.5", "color": f['properties']['color']}).add_to(m)
drawgeoframe[["geom", "color", "Count", "MostCommonTransport"]].rename(columns={"geom": "geometry"}).to_file("../static/data/exode_lines.geojson", driver="GeoJSON")
m

In [None]:
geofence_idf_geometry = geofence_idf.geometry

# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.starting_longitude, df.starting_latitude), crs="EPSG:4326")

# Filter points outside the geofence
gdf_from_idf = gdf[~gdf.geometry.within(geofence_idf_geometry)]

# Create end_geometry for the end points
gdf_from_idf['end_geometry'] = gpd.points_from_xy(gdf_from_idf.ending_longitude, gdf_from_idf.ending_latitude)

# Filter points where end location is inside the geofence
gdf_enter_idf = gdf_from_idf[gdf_from_idf['end_geometry'].within(geofence_idf_geometry)]

# Drop the temporary 'end_geometry' column if not needed
gdf_enter_idf = gdf_enter_idf.drop(columns='end_geometry')
gdf_enter_idf


In [None]:
def most_common_value(series):
    return series.mode().iloc[0]
dfh3 = gdf_enter_idf[gdf_enter_idf["start_time"] > "2024-05-10"].h3.geo_to_h3(4, lat_col="starting_latitude", lng_col="starting_longitude", set_index=False)
df_unique_user = dfh3.drop_duplicates(subset=['h3_04', 'user_id'])
drawgeoframe = df_unique_user[['h3_04', 'top_transportation_mode_tr']].groupby(['h3_04']).agg(Count=('h3_04', np.size), MostCommonTransport=('top_transportation_mode_tr', most_common_value))
drawgeoframe=drawgeoframe.reset_index().set_index('h3_04')
drawgeoframe = drawgeoframe[drawgeoframe['Count'] > 3]
drawgeoframe = drawgeoframe.h3.h3_to_geo()
drawgeoframe["center_geom"] = drawgeoframe["geometry"]
drawgeoframe = drawgeoframe.h3.h3_to_geo_boundary()
drawgeoframe

In [None]:
fixed_point = Point(2.333333, 48.866667)

# Function to create a line from the fixed point to each point
def create_line(point):
    return LineString([fixed_point, point])

# Apply the function to each geometry in the GeoDataFrame
drawgeoframe['geom'] = drawgeoframe['center_geom'].apply(create_line)

In [None]:
colormap = {
    "PLANE": "red",
    "PASSENGER_CAR": "orange",
    "RAIL_TRIP": "green",
    "HIGH_SPEED_TRAIN": "green",
    "FOOT": "black" # wtf
}
drawgeoframe["color"] = drawgeoframe["MostCommonTransport"].apply(lambda x: colormap.get(x))

start_lat = 48.8915079
start_long = 2.3495425
m = folium.Map(location=[start_lat, start_long], zoom_start=13)
folium.GeoJson(drawgeoframe[["geometry", "color"]], style_function=lambda f: {"color": f['properties']['color']}).add_to(m)
drawgeoframe[["geometry", "color", "Count", "MostCommonTransport"]].to_file("../static/data/inxode.geojson", driver="GeoJSON")
folium.GeoJson(drawgeoframe[["geom", "color", "Count"]].rename(columns={"geom": "geometry"}), style_function=lambda f: {"weight": int(f['properties']['Count'])/100, "color": f['properties']['color']}).add_to(m)
drawgeoframe[["geom", "color", "Count", "MostCommonTransport"]].rename(columns={"geom": "geometry"}).to_file("../static/data/inxode_lines.geojson", driver="GeoJSON")
m