In [1]:
import numpy as np
import os
import pyarrow
import sys
import json
import math
import mpl_utils
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import polars as pl
import xml.etree.ElementTree as ET

from xopen import xopen
from datetime import time

# General Directories

In [2]:

general_directory = '/Users/andre/Desktop/Cergy/'

berlin_directory = 'MATSim/matsim-berlin/input/v6.4/'

pt_10pct_dir = 'Python_Scripts/runs/pt_10pct/'

# full paths
NETWORK_PATH = (os.path.join(general_directory, berlin_directory, "berlin-v6.4-network.xml.gz"))
VEHICLE_PATH = (os.path.join(general_directory, berlin_directory, "berlin-v6.4-vehicleTypes.xml"))
MATSIM_TRIPS_PATH = (os.path.join(general_directory, pt_10pct_dir, "matsim_trips/MATSim_trips.parquet"))

METRO_INPUT = (os.path.join(general_directory, pt_10pct_dir, "metro_inputs/"))
METRO_OUTPUT = (os.path.join(general_directory, pt_10pct_dir, "metro_outputs/"))


# Parameters

In [3]:
POPULATION_SHARE = 0.10

# Parameters to use for the simulation.
PARAMETERS ={
    "input_files": {
      "agents": (os.path.join(METRO_INPUT, "agents.parquet")) ,
      "alternatives": (os.path.join(METRO_INPUT, "alts.parquet")),
      "trips": (os.path.join(METRO_INPUT, "trips.parquet")),
      "edges": (os.path.join(METRO_INPUT, "edges.parquet")),
      "vehicle_types": (os.path.join(METRO_INPUT, "vehicles.parquet"))
                },
    "output_directory": METRO_OUTPUT,
    "period": [0.0, 86400.0],
    "road_network": {
        "recording_interval": 950.0,
        "approximation_bound": 1.0,
        "spillback": True,
        "backward_wave_speed": 15.0,
        "max_pending_duration": 30.0,
        "constrain_inflow": True,
        "algorithm_type": "Best"
    },
    "learning_model": {
      "type": "Linear"
    },
    "init_iteration_counter": 1,
    "max_iterations": 350,
    "update_ratio": 1.0,
    "random_seed": 13081996,
    "nb_threads": 16,
    "saving_format": "Parquet",
    "only_compute_decisions": False
}

In [4]:
def parse_attributes(elem, my_dict):
    for attrib in elem.attrib:
        my_dict[attrib] = elem.attrib[attrib]

# Supply

## Vehicles

In [5]:
def vehicle_reader():
    tree = ET.iterparse(xopen(VEHICLE_PATH, "r"), events=["start", "end"])
    vehicle_types = []
    current_vehicle_type = {}
    is_parsing_vehicle_type = False
    for xml_event, elem in tree:
        _, _, elem_tag = elem.tag.partition("}")  # Removing xmlns tag from tag name
        # VEHICLETYPES
        if elem_tag == "vehicleType" and xml_event == "start":
            parse_attributes(elem, current_vehicle_type)
            is_parsing_vehicle_type = True
        # ATTRIBUTES
        elif elem_tag == "attribute" and xml_event == "start":
            current_vehicle_type[elem.attrib["name"]] = elem.text
        # LENGTH / WIDTH
        elif elem_tag in ["length", "width"] and xml_event == "start":
            current_vehicle_type[elem_tag] = elem.attrib["meter"]
        # VEHICLETYPES
        elif elem_tag == "vehicleType" and xml_event == "end":
            vehicle_types.append(current_vehicle_type)
            current_vehicle_type = {}
            elem.clear()
            is_parsing_vehicle_type = False
        # EVERYTHING ELSE
        elif is_parsing_vehicle_type and elem_tag not in ["attribute", "length", "width"]:
            parse_attributes(elem, current_vehicle_type)
    vehicle_types = pd.DataFrame.from_records(vehicle_types)
    col_types = {
        "accessTimeInSecondsPerPerson": float,
        "egressTimeInSecondsPerPerson": float,
        "seats": int,
        "standingRoomInPersons": int,
        "length": float,
        "width": float,
        "pce": float,
        "factor": float,
    }
    for col, dtype in col_types.items():
        if col in vehicle_types.columns:
            try:
                vehicle_types[col] = vehicle_types[col].astype(dtype)
            except:
                print(f"dataframe types conversion failed for column {col}")
    return vehicle_types

In [6]:
def make_vehicles_df(vehicle_types):
    vehicle_list = []

    for idx, row in vehicle_types.iterrows():
        if row["id"] == "ride":
            vehicle = {
                "vehicle_id": idx,
                "vehicle_type": row["id"],
                "headway": float(row["length"]),
                "pce": 0.0,
                "speed_function.type": "Base",
                "speed_function.upper_bound": None,
                "speed_function.coef": None,
            }
        else:
            vehicle = {
                "vehicle_id": idx,
                "vehicle_type": row["id"],
                "headway": float(row["length"]),
                "pce": float(row["pce"]) / POPULATION_SHARE,
                "speed_function.type": "Base",
                "speed_function.upper_bound": None,
                "speed_function.coef": None,
            }
        vehicle_list.append(vehicle)

    vehicles = pl.DataFrame(vehicle_list)
    vehicles = vehicles.filter(pl.col('vehicle_id')<5)
    return vehicles

# Network

In [7]:
def read_network():
    tree = ET.iterparse(xopen(NETWORK_PATH, "r"), events=["start", "end"])
    links = []
    
    for xml_event, elem in tree:
        if elem.tag == "link" and xml_event == "start":
            atts = elem.attrib
            atts["link_id"] = atts["id"].replace("#", "")
            atts["numeric_link_id"] = int(atts["id"].split("#")[0])            
            atts["from_node"] = atts.pop("from")
            atts["to_node"] = atts.pop("to")

            if "cluster" in atts["from_node"]:
                atts["from_node"] = atts["from_node"].replace("cluster_", "").split("_")[0]
            if "cluster" in atts["to_node"]:
                atts["to_node"] = atts["to_node"].replace("cluster_", "").split("_")[0]

            atts["length"] = float(atts["length"])
            atts["freespeed"] = float(atts["freespeed"])
            atts["capacity"] = float(atts["capacity"])
            atts["permlanes"] = float(atts["permlanes"])
            if "volume" in atts:
                atts["volume"] = float(atts["volume"])
            links.append(atts)

        if elem.tag in ["node", "link"] and xml_event == "end":
            elem.clear()

    links = pd.DataFrame.from_records(links)
    links = links.loc[links["modes"].str.contains("car")].copy()
    links["link_id"] = links["link_id"].astype(int)
    links["from_node"] = links["from_node"].astype(int)
    links["to_node"] = links["to_node"].astype(int)

    node_pair_counts = links[["from_node", "to_node"]].value_counts()

    new_rows = []

    if node_pair_counts.max() > 2:
        print("More than two parallel edges")
        print(f"Maximum of {node_pair_counts.max()} parallel edges")

    # parallel edges
    parallel_idx_gt2 = node_pair_counts.loc[node_pair_counts > 2].index
    if len(parallel_idx_gt2):
        print(f"Handling {len(parallel_idx_gt2)} node pairs with more than 2 parallel edges...")
        next_node_id = max(links["from_node"].max(), links["to_node"].max()) + 1
        next_link_id = links["link_id"].max() + 1
        for (source, target) in parallel_idx_gt2:
            mask = (links["from_node"] == source) & (links["to_node"] == target)
            idx = mask[mask].index
            for i in range(1, len(idx)): 
                row = links.loc[idx[i]].copy()
                row["length"] = 0.0
                row["from_node"] = next_node_id
                row["link_id"] = next_link_id
                new_rows.append(row)
                links.loc[idx[i], "to_node"] = next_node_id
                next_node_id += 1
                next_link_id += 1

    # 2 parallel edges
    parallel_idx = node_pair_counts.loc[node_pair_counts == 2].index
    if len(parallel_idx):
        print(f"Found {len(parallel_idx)} parallel edges")
        next_node_id = max(links["from_node"].max(), links["to_node"].max()) + 1
        next_link_id = links["link_id"].max() + 1
        for (source, target) in parallel_idx:
            mask = (links["from_node"] == source) & (links["to_node"] == target)
            idx = mask[mask].index
            if len(idx) < 2:
                continue  
            row = links.loc[idx[1]].copy()
            row["length"] = 0.0
            row["from_node"] = next_node_id
            row["link_id"] = next_link_id
            new_rows.append(row)
            links.loc[idx[1], "to_node"] = next_node_id
            next_node_id += 1
            next_link_id += 1

    if new_rows:
        links = pd.concat((links, pd.DataFrame(new_rows)), ignore_index=True)

    return links

In [8]:
def make_edges_df(links):
    edge_list = []

    for i, (_, row) in enumerate(links.iterrows()):
        edge = {
            "edge_id": i+1,
            "MATSim_id": row["id"],
            "source": int(row["from_node"]),
            "target": int(row["to_node"]),
            "speed": float(row["freespeed"]),
            "length": float(row["length"]),
            "lanes": float(row["permlanes"]),
            "speed_density.type": "FreeFlow",
            "speed_density.capacity": None,
            "speed_density.min_density": None,
            "speed_density.jam_density": None,
            "speed_density.jam_speed": None,
            "speed_density.beta": None,
            "bottleneck_flow": float(row["capacity"])/ (row['permlanes']*3600.0),  # capacity per lane in vehicles per hour
            "constant_travel_time": math.ceil(float(row["length"]) / float(row["freespeed"])) - float(row["length"]) / float(row["freespeed"]),
            "overtaking": True
        }
        edge_list.append(edge)

    edges = pl.DataFrame(edge_list)
    return edges

# Create Supply

In [9]:
print("Creating MATSim vehicles")
vehicles = vehicle_reader()
vehicles = make_vehicles_df(vehicles)

Creating MATSim vehicles
dataframe types conversion failed for column seats
dataframe types conversion failed for column standingRoomInPersons


In [10]:
print("Creating MATSim network")
links = read_network()
edges = make_edges_df(links)

Creating MATSim network
More than two parallel edges
Maximum of 3 parallel edges
Handling 10 node pairs with more than 2 parallel edges...
Found 1832 parallel edges


# Demand

In [11]:
def generate_trips(matsim_trips, edges, vehicles):
    
    # link (matsim) to edge (metro) dictionary
    matsim_to_metro_links = dict(zip(edges["MATSim_id"].cast(pl.Utf8), edges["edge_id"]))
    
    metro_trips = matsim_trips
    
    # class.vehicle
    metro_trips = (
        metro_trips
        .join(vehicles.select([
            pl.col("vehicle_type").alias("mode"),
            pl.col("vehicle_id").alias("class.vehicle")]),on="mode", how="left")
        .with_columns([

            # class.type
            pl.when(pl.col("mode").is_in(['truck', 'car', 'freight', 'ride']) #| pl.col("route") is None
                   )
            .then(pl.lit("Road"))
            .otherwise(pl.lit("Virtual"))
            .alias("class.type")])
    )
    
    
    metro_trips = (
        
    # class.type
    metro_trips
    .rename({'start_time':'dt_choice.departure_time'
            })
    .with_columns([
                
    # class.routes
    pl.when(pl.col("class.type") == "Road")
      .then(pl.col("route").str.split(" ") # split route string
            
            # map in the dictionary
            .map_elements(lambda link_list: None if link_list is None
                          else [matsim_to_metro_links.get(link) for link in link_list[1:]],
                          return_dtype=pl.List(pl.Int64))
            .alias("class.route"))
      .otherwise(None),

        
    # class.travel_time
    pl.when(pl.col("class.type") == "Road")
      .then(None)
      .otherwise(pl.col("duration"))
    .alias("class.travel_time")
    ])
    .drop(['person_id' , 'route', 'duration', 'end_time', 'mode'])
    )
    
    # Join with edges for start_link's from and to nodes
    metro_trips = (
        metro_trips
        .join(
            edges.select([
                pl.col("MATSim_id").alias("start_link"),
                pl.col("target").alias("class.origin")]), # class.origin
            on="start_link",how="left")
        .drop(pl.col('start_link'))
        .join(
            edges.select([
                pl.col("MATSim_id").alias("end_link"),
                pl.col("target").alias("class.destination")]), # class.destination
            on="end_link", how="left")
        .drop(pl.col('end_link'))
    )
    
    
    metro_trips = (
        metro_trips
        .with_columns([
            pl.lit(1).alias("alt_id"),
            pl.lit("Constant").alias("dt_choice.type"),
            ((pl.col("plan_id")*100).cast(pl.Utf8)+ pl.col("tour_id").cast(pl.Utf8))
            .cast(pl.Int64).alias("agent_id")]) # agent_id ={plan_id*100;tour_id}
    )
    
    # Prep next trip for additional stopping times
    metro_trips = (
        metro_trips
        .with_columns([
            # Get class.type of next trip within each agent
            pl.col("class.type")
            .shift(-1)
            .over("agent_id")
            .alias("next_class_type")])
    )
    
        # Prep next trip for additional stopping times
    metro_trips = (
        metro_trips
        .with_columns([
        # Add 1 to stopping_time if the next trip is of type "Road"
            pl.when(
                pl.col("stopping_time").is_not_null() &
                (pl.col("next_class_type") == "Road")
            )
            .then(pl.col("stopping_time") + 2)
            .otherwise(pl.col("stopping_time"))
            .alias("stopping_time")
        ])
        # Select columns
        .select(['agent_id', 'alt_id', 'trip_id',
                 'class.type', 'class.origin', 'class.destination', 'class.vehicle', 'class.route', 
                 'class.travel_time', 'stopping_time', 'dt_choice.type', 'dt_choice.departure_time'
                ])
    )
    
            
                
    return metro_trips

In [12]:
# Read MATSim trips
matsim_trips = pl.read_parquet(MATSIM_TRIPS_PATH)

In [13]:
metro_trips = generate_trips(matsim_trips, edges, vehicles)

# Format Supply for Metropolis

In [39]:
def format_supply(edges, vehicles):
    edges = edges.drop(["MATSim_id"])
    vehicles = vehicles.drop(["vehicle_type"])    
    return [edges, vehicles]

In [40]:
supply = format_supply(edges, vehicles)
edges_df = supply[0]
vehicles_df = supply[1]

In [41]:
# Writing supply files
print("Writing files to ", METRO_INPUT)
edges_df.write_parquet(METRO_INPUT + "edges.parquet")
vehicles_df.write_parquet(METRO_INPUT + "vehicles.parquet")

Writing files to  /Users/andre/Desktop/Cergy/Python_Scripts/runs/pt_10pct/metro_inputs/


# Format Demand for Metropolis

In [42]:
def format_demand(trips):
    
    # format trips
    # Eliminate trips departing after 48 hours
    trips = trips.filter(pl.col("dt_choice.departure_time") <= 108000,
                         ~((pl.col("class.type") == "Road") &
                           (pl.col("class.origin").is_null()|pl.col("class.destination").is_null())
                          ))
            
    # format agents
    agents = trips.select("agent_id").unique().with_columns([
        pl.lit("Deterministic").alias("alt_choice.type"),
        pl.lit(0.0).alias("alt_choice.u"),
        pl.lit(None).alias("alt_choice.mu")
    ]).sort("agent_id")

    # format alts
    alts = (
        trips.sort("dt_choice.departure_time")
        .unique(subset=["agent_id"], keep="first")
        .select([
            "agent_id",
            "alt_id",
            pl.lit(None).alias("origin_delay"),
            pl.col("dt_choice.type"),
            "dt_choice.departure_time",

            pl.lit(None).alias("dt_choice.interval"),
            pl.lit(None).alias("dt_choice.model.type"),
            pl.lit(0.0).alias("dt_choice.model.u"),
            pl.lit(0.0).alias("dt_choice.model.mu"),
            pl.lit(None).alias("dt_choice.offset"),

            pl.lit(0.0).alias("constant_utility"),
            pl.lit(None).alias("total_travel_utility.one"),
            pl.lit(None).alias("total_travel_utility.two"),
            pl.lit(None).alias("total_travel_utility.three"),
            pl.lit(None).alias("total_travel_utility.four"),

            pl.lit(None).alias("origin_utility.type"),
            pl.lit(0.0).alias("origin_utility.tstar"),
            pl.lit(0.0).alias("origin_utility.beta"),
            pl.lit(0.0).alias("origin_utility.gamma"),
            pl.lit(0.0).alias("origin_utility.delta"),

            pl.lit(None).alias("destination_utility.type"),
            pl.lit(0.0).alias("destination_utility.tstar"),
            pl.lit(0.0).alias("destination_utility.beta"),
            pl.lit(0.0).alias("destination_utility.gamma"),
            pl.lit(0.0).alias("destination_utility.delta"),

            pl.lit(True).alias("pre_compute_route")
        ])
    )
    alts = alts.sort("agent_id")
    
    trips = trips.drop(["dt_choice.type", "dt_choice.departure_time"])

    
    return agents, alts, trips

# Write Metro input

In [43]:
# Parameters
print("Writing Metropolis parameters")
with open(os.path.join(METRO_INPUT, "parameters.json"), "w") as f:
    f.write(json.dumps(PARAMETERS))

Writing Metropolis parameters


In [44]:
# Writing files
print("Writing Metropolis supply in ", METRO_INPUT)
edges_df.write_parquet(METRO_INPUT + "edges.parquet")
vehicles_df.write_parquet(METRO_INPUT + "vehicles.parquet")

Writing Metropolis supply in  /Users/andre/Desktop/Cergy/Python_Scripts/runs/pt_10pct/metro_inputs/


In [45]:
# Formating
agents_df = format_demand(metro_trips)[0]
alts_df = format_demand(metro_trips)[1]
trips_df = format_demand(metro_trips)[2]

## +1 sec de marche à pied pour les freight

### Freight

In [46]:
freight_agents = trips_df.filter(pl.col("class.vehicle") == 3).select("agent_id").unique()

trips_df = trips_df.with_columns([
    pl.when(
        pl.col("agent_id").is_in(freight_agents["agent_id"]) &
        pl.col("class.travel_time").is_not_null()
    )
    .then(pl.max_horizontal([pl.col("class.travel_time"), pl.lit(1)]))
    .otherwise(pl.col("class.travel_time"))
    .alias("class.travel_time")
])

# Write Demand Files

In [47]:
#agents_df=agents_df.sample(fraction= 1, with_replacement=False, shuffle=True)

In [48]:
trips_df = trips_df.with_columns(pl.lit(None).alias("class.route"))

In [49]:
trips_df

agent_id,alt_id,trip_id,class.type,class.origin,class.destination,class.vehicle,class.route,class.travel_time,stopping_time
i64,i32,i64,str,i64,i64,i64,null,f64,f64
1,1,1,"""Virtual""",158987061,,,,603.0,0.0
1,1,2,"""Virtual""",,,,,2542.0,0.0
1,1,3,"""Virtual""",,26646233,,,495.0,30953.0
2,1,4,"""Virtual""",26646233,,,,495.0,0.0
2,1,5,"""Virtual""",,,,,2197.0,0.0
…,…,…,…,…,…,…,…,…,…
526110001,1,2,"""Road""",101473666,6171409038,0,,,0.0
526110001,1,3,"""Virtual""",6171409038,6171409038,,,0.0,3765.0
526110001,1,4,"""Virtual""",6171409038,6171409038,,,0.0,2.0
526110001,1,5,"""Road""",6171409038,101473666,0,,,0.0


In [50]:
# Writing files
print("Writing Metropolis input to", METRO_INPUT)

print("Writing Metropolis agents")
agents_df.write_parquet(METRO_INPUT + "agents.parquet")

print("Writing Metropolis alternatives")
alts_df.write_parquet(METRO_INPUT + "alts.parquet")

print("Writing Metropolis trips")
trips_df.write_parquet(METRO_INPUT + "trips.parquet")
print("Input files have been successfully written")

Writing Metropolis input to /Users/andre/Desktop/Cergy/Python_Scripts/runs/pt_10pct/metro_inputs/
Writing Metropolis agents
Writing Metropolis alternatives
Writing Metropolis trips
Input files have been successfully written
