In [1]:
from pathlib import Path
import pandas as pd

from pam.read.matsim import load_attributes_map_from_v12, stream_matsim_persons
from pam.utils import datetime_to_matsim_time
from pam.core import Population, Person
from pam.activity import Activity, Plan, Leg

In [2]:
dir = Path(
    "C:/Users/fred/Data/2019_baseline_re_run_20221209_low_cost_bus_walk"
)

# output paths
output_dir = dir / "utils"
output_dir.mkdir(exist_ok=True)
schedules_path = output_dir / "seq_utils.csv"
attributes_path = output_dir / "attributes.csv"

# input paths
input_plans = dir / "input_plans.xml"
iter_50 = (
    dir
    / "output_experienced_plans_ITER50.xml"
    / "output_experienced_plans_ITER50.xml"
)
iter_100 = (
    dir
    / "output_experienced_plans_ITER100.xml"
    / "output_experienced_plans_ITER100.xml"
)
iter_150 = (
    dir
    / "output_experienced_plans_ITER150.xml"
    / "output_experienced_plans_ITER150.xml"
)
iter_200 = (
    dir
    / "output_experienced_plans.xml_ITER200"
    / "output_experienced_plans.xml_ITER200"
)
iter_200.exists()


True

In [3]:
streamer1 = stream_matsim_persons(
    iter_50,
    simplify_pt_trips=True,
    # crop=True,
    keep_non_selected=False,
    leg_attributes=True,
    leg_route=True,
)
streamer2 = stream_matsim_persons(
    iter_100,
    simplify_pt_trips=True,
    # crop=True,
    keep_non_selected=False,
    leg_attributes=True,
    leg_route=True,
)
streamer3 = stream_matsim_persons(
    iter_150,
    simplify_pt_trips=True,
    # crop=True,
    keep_non_selected=False,
    leg_attributes=True,
    leg_route=True,
)
streamer4 = stream_matsim_persons(
    iter_200,
    simplify_pt_trips=True,
    # crop=True,
    keep_non_selected=False,
    leg_attributes=True,
    leg_route=True,
)
streamers = [streamer1, streamer2, streamer3, streamer4]
# streamers = [streamer1, streamer2]

In [4]:
def dt_to_min(dt) -> int:
    h, m, s = datetime_to_matsim_time(dt).split(":")
    return (int(h) * 60) + int(m)


def person_to_schedule(person: Person) -> tuple:
    score = person.plan.score
    record = []
    for component in person.plan:
        if isinstance(component, Leg):
            distance = component.euclidean_distance
            mode = component.mode
        else:
            distance = 0
            mode = "NA"
        record.append(
            [
                component.act,
                dt_to_min(component.start_time),
                dt_to_min(component.end_time),
                mode,
                distance,
                score,
            ]
        )
    return person.pid, record


def add_data(record, pid, iteration):
    record = [[pid, iteration] + line for line in record]
    return record

In [5]:
schedules = []
uid = 0
mapper = {}

for iteration, streamer in zip([50, 100, 150, 200], streamers):
    print("iteration: ", iteration)
    for person in streamer:
        pid, record = person_to_schedule(person)
        if record:
            mapper[uid] = pid
            schedules.extend(add_data(record, uid, iteration))
            uid += 1

schedules = pd.DataFrame(
    schedules, columns=["pid", "iter", "act", "start", "end", "mode", "distance", "score"]
).set_index("pid")

print(schedules.index.nunique())
print(len(mapper))

iteration:  50
iteration:  100
iteration:  150
iteration:  200
477996
477996


In [6]:
attributes = load_attributes_map_from_v12(input_plans)
attributes = {k: attributes[v] for k, v in mapper.items()}
attributes = pd.DataFrame(attributes).T
attributes = attributes.drop(
    columns=[
        "hid",
        "hid_old",
        "hzone",
        "householdid",
        "individualid",
        "surveyyear",
    ]
)
attributes.index.name = "pid"
attributes = attributes.fillna("unknown")
attributes.head()

Unnamed: 0_level_0,hcounty,gender,age,workstatus,hasLicence,hasCar,hasBike,car_avail,subpopulation,hhincome,sex,age_group,CarType,CarCO2
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,Barnsley,male,82,inactive,no,True,False,never,car_avail_no_low,low,m,80 to 84,unknown,unknown
1,Barnsley,female,82,inactive,no,True,False,never,car_avail_no_low,low,f,80 to 84,unknown,unknown
2,Doncaster,male,82,inactive,no,True,False,never,car_avail_no_low,low,m,80 to 84,unknown,unknown
3,Doncaster,female,82,inactive,no,True,False,never,car_avail_no_low,low,f,80 to 84,unknown,unknown
4,Doncaster,male,82,inactive,no,True,False,never,car_avail_no_low,low,m,80 to 84,unknown,unknown


In [8]:
len(attributes)

477996

In [9]:
schedules.to_csv(schedules_path)
attributes.to_csv(attributes_path)

In [10]:
schedules.describe()

Unnamed: 0,iter,start,end,distance,score
count,3374298.0,3374298.0,3374298.0,3374298.0,3374298.0
mean,124.9765,717.7813,921.1639,7.449847,88.33717
std,55.90553,386.6237,329.7472,31.64926,121.6332
min,50.0,0.0,0.0,0.0,-2587.492
25%,50.0,502.0,641.0,0.0,33.97256
50%,100.0,766.0,921.0,0.0,124.7424
75%,150.0,1013.0,1151.0,3.82277,176.0431
max,200.0,1919.0,1919.0,537.5053,237.1415
