In [None]:
import os
from pathlib import Path

import pandas as pd
import numpy as np

import shapely.geometry as sgeo
import geopandas as gpd
import pyogrio

import plotly.io as pio
import plotly.express as px

In [None]:
pio.templates.default = "plotly_dark"

# Calibration notebook

Note: This notebook is a limited version of the full comparison and analysis notebook (`MiD Comparison.ipynb`) that only covers the aspects that we intend to fit during the manual calibratin of the choice model and congestion parameters.

## Settings

In [None]:
input_path = Path("/home/shoerl/tum/output")
input_prefix = "bavaria_1pct_"
input_sampling_factor = 0.01

In [None]:
# check files from the synthetic population
assert os.path.exists(input_path / "{}homes.gpkg".format(input_prefix))
assert os.path.exists(input_path / "{}households.csv".format(input_prefix))
assert os.path.exists(input_path / "{}persons.csv".format(input_prefix))

# check files from mode choice and/or MATSim simulation
assert os.path.exists(input_path / "eqasim_trips.csv")

# check zone data
assert os.path.exists("zones.gpkg")

In [None]:
# Zoning data
df_zones = gpd.read_file("zones.gpkg")

## Data preparation

In [None]:
# Load home locations
df_homes = pyogrio.read_dataframe(input_path / "{}homes.gpkg".format(input_prefix))

# Create a data frame to attach zone tags 
df_home_zones = gpd.sjoin(df_homes, df_zones, predicate = "within")[["household_id", "zone_id"]]

def home_zones(df, zones):
    df_selection = df_home_zones[df_home_zones["zone_id"].isin(zones)]
    return pd.merge(df, df_selection, on = "household_id")

In [None]:
# Load person information
df_persons = pd.read_csv(input_path / "{}persons.csv".format(input_prefix), sep = ";")

In [None]:
# Merge household information
df_households = pd.read_csv(input_path / "{}households.csv".format(input_prefix), sep = ";")
df_persons = pd.merge(df_persons, df_households, on = "household_id")

In [None]:
df_trips = pd.read_csv(input_path / "eqasim_trips.csv", sep = ";")

origin = gpd.points_from_xy(df_trips["origin_x"], df_trips["origin_y"])
destination = gpd.points_from_xy(df_trips["destination_x"], df_trips["destination_y"])

df_trips["geometry"] = [sgeo.LineString(od) for od in zip(origin, destination)]
df_trips = gpd.GeoDataFrame(df_trips, crs = "EPSG:25832")

In [None]:
# Load trip information
df_trips = pd.read_csv(input_path / "eqasim_trips.csv", sep = ";")

origin = gpd.points_from_xy(df_trips["origin_x"], df_trips["origin_y"])
destination = gpd.points_from_xy(df_trips["destination_x"], df_trips["destination_y"])

df_trips["geometry"] = [sgeo.LineString(od) for od in zip(origin, destination)]
df_trips = gpd.GeoDataFrame(df_trips, crs = "EPSG:25832")

df_trips["trip_index"] = np.arange(len(df_trips))

In [None]:
# Merge in household id
df_trips = pd.merge(df_trips, df_persons[["person_id", "household_id"]])

# Create a data frame to attach zone tags 
df_trip_zones  = df_trips[["person_id", "trip_index"]].copy()
df_trip_zones = pd.merge(df_trip_zones, df_persons[["person_id", "household_id"]], on = "person_id")
df_trip_zones = pd.merge(df_trip_zones, df_home_zones, on = "household_id").drop(columns = ["person_id"])

def trip_zones(df, zones):
    df_selection = df_trip_zones[df_trip_zones["zone_id"].isin(zones)]
    return pd.merge(df, df_selection, on = "trip_index")

In [None]:
# Add tag if person is active
df_persons["is_active"] = df_persons["person_id"].isin(df_trips["person_id"])

In [None]:
# Enrich trips
df_trips["trips"] = 1.0
df_trips["distance_km"] = df_trips["routed_distance"] * 1e-3
df_trips["travel_time_min"] = df_trips["travel_time"] / 60
df_trips["euclidean_distance_km"] = df_trips["euclidean_distance"] * 1e-3

# Definition of purpose
df_trips["purpose"] = df_trips["following_purpose"]
f_return = df_trips["following_purpose"] == "home"
df_trips.loc[f_return, "purpose"] = df_trips.loc[f_return, "preceding_purpose"]

# Remove zero distance trips
df_trips = df_trips[df_trips["euclidean_distance"] > 0]

## Comparison

In [None]:
df = trip_zones(df_trips[["trip_index", "mode", "distance_km", "travel_time_min"]], ["mvv", "munich", "umland"]).drop(columns = ["trip_index"]).groupby(
    ["zone_id", "mode"]).median(numeric_only = True).reset_index().assign(data = "synthetic")

df = pd.concat([df, pd.DataFrame.from_records([
    { "zone_id": "mvv", "mode": "walk", "distance_km": 1, "travel_time_min": 1, "data": "MiD" },
    { "zone_id": "mvv", "mode": "bicycle", "distance_km": 2, "travel_time_min": 2, "data": "MiD" },
    { "zone_id": "mvv", "mode": "car_passenger", "distance_km": 6, "travel_time_min": 6, "data": "MiD" },
    { "zone_id": "mvv", "mode": "car", "distance_km": 8, "travel_time_min": 8, "data": "MiD" },
    { "zone_id": "mvv", "mode": "pt", "distance_km": 8, "travel_time_min": 8, "data": "MiD" },
    { "zone_id": "munich", "mode": "walk", "distance_km": 1, "travel_time_min": 1, "data": "MiD" },
    { "zone_id": "munich", "mode": "bicycle", "distance_km": 2, "travel_time_min": 2, "data": "MiD" },
    { "zone_id": "munich", "mode": "car_passenger", "distance_km": 6, "travel_time_min": 6, "data": "MiD" },
    { "zone_id": "munich", "mode": "car", "distance_km": 7, "travel_time_min": 7, "data": "MiD" },
    { "zone_id": "munich", "mode": "pt", "distance_km": 6, "travel_time_min": 6, "data": "MiD" },
    { "zone_id": "umland", "mode": "walk", "distance_km": 1, "travel_time_min": 1, "data": "MiD" },
    { "zone_id": "umland", "mode": "bicycle", "distance_km": 2, "travel_time_min": 2, "data": "MiD" },
    { "zone_id": "umland", "mode": "car_passenger", "distance_km": 6, "travel_time_min": 6, "data": "MiD" },
    { "zone_id": "umland", "mode": "car", "distance_km": 8, "travel_time_min": 8, "data": "MiD" },
    { "zone_id": "umland", "mode": "pt", "distance_km": 14, "travel_time_min": 14, "data": "MiD" },
])])

In [None]:
px.bar(
    df, x = "zone_id", y = "distance_km", pattern_shape = "data", barmode = "group",
    title = "Median distance by mode", color = "mode"
)

In [None]:
px.bar(
    df, x = "zone_id", y = "travel_time_min", pattern_shape = "data", barmode = "group",
    title = "Median travel time by mode", color = "mode"
)

### Mode share by trips and distance (Figure 17)

In [None]:
df = trip_zones(df_trips[["trip_index", "mode"]], ["mvv", "munich", "umland"]).drop(columns = ["trip_index"]).groupby([
    "mode", "zone_id"
]).size().reset_index(name = "count").assign(data = "synthetic")

df_total = df.groupby("zone_id")["count"].sum().reset_index(name = "total")
df = pd.merge(df, df_total, on = "zone_id")
df["share"] = df["count"] / df["total"]

df = pd.concat([df, pd.DataFrame.from_records([
    { "zone_id": "mvv", "mode": "walk", "share": 0.21, "data": "MiD" },
    { "zone_id": "mvv", "mode": "bicycle", "share": 0.15, "data": "MiD" },
    { "zone_id": "mvv", "mode": "car_passenger", "share": 0.12, "data": "MiD" },
    { "zone_id": "mvv", "mode": "car", "share": 0.34, "data": "MiD" },
    { "zone_id": "mvv", "mode": "pt", "share": 0.18, "data": "MiD" },
    { "zone_id": "munich", "mode": "walk", "share": 0.24, "data": "MiD" },
    { "zone_id": "munich", "mode": "bicycle", "share": 0.18, "data": "MiD" },
    { "zone_id": "munich", "mode": "car_passenger", "share": 0.10, "data": "MiD" },
    { "zone_id": "munich", "mode": "car", "share": 0.24, "data": "MiD" },
    { "zone_id": "munich", "mode": "pt", "share": 0.24, "data": "MiD" },
    { "zone_id": "umland", "mode": "walk", "share": 0.18, "data": "MiD" },
    { "zone_id": "umland", "mode": "bicycle", "share": 0.13, "data": "MiD" },
    { "zone_id": "umland", "mode": "car_passenger", "share": 0.14, "data": "MiD" },
    { "zone_id": "umland", "mode": "car", "share": 0.44, "data": "MiD" },
    { "zone_id": "umland", "mode": "pt", "share": 0.11, "data": "MiD" },
])])

px.bar(
    df, x = "zone_id", y = "share", pattern_shape = "data", barmode = "group",
    title = "Mode share by trips", color = "mode"
)