In [1]:
import polars as pl
import plotly.express as px
import toml
from pathlib import Path
import util
import psrc_theme

# to show plotly figures in quarto HTML file
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
pio.templates.default = "simple_white+psrc_color" # set plotly template

In [2]:
config = toml.load(Path(Path.cwd(), '..\..\..\..\configuration', 'validation_configuration.toml'))
input_config = toml.load(Path(Path.cwd(), '..\..\..\..\configuration', 'input_configuration.toml'))

data = util.ValidationData(config,input_config,['hh', 'person', 'tour'])

In [3]:
df_tour = data.tour
df_hh = data.hh
df_person = data.person
# df_parcel = data.land_use

# auto_ownership with 4+
df_hh = df_hh.with_columns(
    pl.when(pl.col('hhvehs') >= 4.0).then("4+").otherwise(pl.col('hhvehs').cast(str)).alias('auto_ownership_4+')
)

# hhsize with 4+
df_hh = df_hh.with_columns(
    pl.when(pl.col('hhsize') >= 4.0).then("4+").otherwise(pl.col('hhsize').cast(str)).alias('hhsize_4+')
)

# Add column for (potential) drivers adults (all hh members 16 and above)
df_hh = df_hh.with_columns(
    (pl.col('hhsize') - pl.col('hh515') - pl.col('hhcu5')).alias('drivers')
)

# auto availability
df_hh = df_hh.with_columns(
    (pl.col('hhvehs') - pl.col('drivers')).alias('auto_count_driver'),
    (pl.col('hhvehs') - pl.col('hhwkrs')).alias('auto_count_worker')
).with_columns(
    pl.when(pl.col('drivers') <= 0).then("no driver")
    .when(pl.col('hhvehs') <= 0).then("no car")
    .when(pl.col('auto_count_driver') < 0).then("cars fewer than drivers")
    .otherwise("enough cars").alias('auto_available_driver')
).with_columns(
    pl.when(pl.col('hhwkrs') <= 0).then("no worker")
    .when(pl.col('hhvehs') <= 0).then("no car")
    .when(pl.col('auto_count_worker') < 0).then("cars fewer than workers")
    .otherwise("enough cars").alias('auto_available_worker')
)


# add person type labels
ptype_cat = {1: "full time worker",
             2: "part time worker",
             3: "non-worker age 65+",
             4: "other non-working adult",
             5: "university student",
             6: "grade school student/child age 16+",
             7: "child age 5-15",
             8: "child age 0-4"}
df_person = df_person.with_columns(
    pl.col('pptyp').map_dict(ptype_cat).alias('pptyp_label')
)

# add mode type labels
mode_cat = {1: "1: walk",
            2: "2: bike",
            3: "3: sov",
            4: "4: hov 2",
            5: "5: hov 3+",
            6: "6: walk to transit",
            7: "7: park-and-ride",
            8: "8: school bus",
            9: "9: tnc"}
pdpurp_cat = {1: "1: Work",
              2: "2: School",
              3: "3: Escort",
              4: "4: other home-based",
              5: "4: other home-based",
              6: "4: other home-based",
              7: "4: other home-based",
              8: "4: other home-based",
              9: "4: other home-based",
              10: "4: other home-based"}
df_tour = df_tour.with_columns(
    pl.col('tmodetp').map_dict(mode_cat).alias('tmodetp_label'),
    pl.col('pdpurp').map_dict(mode_cat).alias('pdpurp_label'),
)

# merge household data with parcel data
# df_hh = df_hh.join(df_parcel, how="left", left_on='hhno', right_on='parcelid')

# merge person data with household data
df_person = df_person.join(df_hh, how='left', on=['hhno', 'source'])

# merge tour data with person data
df_tour = df_tour.join(df_person, how='left', on=['pno', 'hhno', 'source'])

In [4]:
wk_tour = df_tour.filter((pl.col('parent') == 0) & ~(pl.col('pdpurp').is_in([1])))

In [5]:
df_plot = wk_tour.groupby(['source', 'tmodetp_label']).agg(
    toexpfac_sum=pl.col('toexpfac').sum(),
    sample_count=pl.col('toexpfac').count()
)

df_plot = df_plot.with_columns(
    (pl.col('toexpfac_sum') / pl.col('toexpfac_sum').sum().over('source')).alias('percentage')
)

fig = px.bar(
    df_plot.sort(by=['source']),
    x="tmodetp_label",
    y="percentage",
    color="source",
    barmode="group",
    hover_data=['sample_count'],
    title="school tour mode"
)
fig.update_layout(
    height=400,
    width=700,
    font=dict(size=11),
    xaxis=dict(dtick=1, categoryorder='category ascending'),
    yaxis=dict(tickformat=".2%")
)
fig.show()

### mode choice by segment

In [6]:
def plot_mode_choice(df: pl.DataFrame, grp_var: str, order_list: dict, title_name: str, n_nol: int, height=400, width=800):
    df_plot = df.groupby(['source', grp_var, 'tmodetp_label']).agg(
        toexpfac_sum=pl.col('toexpfac').sum()
    )
    df_plot = df_plot.with_columns(
        (pl.col('toexpfac_sum') / pl.col('toexpfac_sum').sum().over(['source', grp_var])).alias('percentage')
    )

    df_plot_ct = df.groupby(['source', grp_var, 'tmodetp_label']).agg(
        sample_count=pl.col('toexpfac').count()
    )
    df_plot = df_plot.join(df_plot_ct, on=['source', grp_var, 'tmodetp_label'])

    fig = px.bar(
        df_plot.sort(by=['source', 'tmodetp_label']),
        x="percentage",
        y="tmodetp_label",
        color="source",
        barmode="group",
        facet_col=grp_var,
        facet_col_wrap=n_nol,
        orientation='h',
        hover_data=['sample_count'],
        category_orders=order_list,
        title="school tour mode choice by " + title_name
    )
    fig.update_layout(height=height, width=width)
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    fig.for_each_xaxis(lambda a: a.update(tickformat=".1%"))
    fig.show()

In [7]:
plot_mode_choice(wk_tour,"pptyp_label",
                 {"pptyp_label":["full time worker","part time worker","non-worker age 65+","other non-working adult",
                                 "university student","grade school student/child age 16+","child age 5-15","child age 0-4"],
                  "tmodetp_label":["1: walk","2: bike","3: sov","4: hov 2","5: hov 3+","6: walk to transit",
                                   "7: park-and-ride","8: school bus","9: other–survey only"]},
                 "person type",2,1000)

In [8]:
plot_mode_choice(wk_tour,"hhsize_4+",
                 {"hhsize_4+":["1","2","3","4+"],
                  "tmodetp_label":["1: walk","2: bike","3: sov","4: hov 2","5: hov 3+","6: walk to transit",
                                   "7: park-and-ride","8: school bus","9: other–survey only"]},
                 "household size",2,600)

In [9]:
plot_mode_choice(wk_tour.filter(pl.col('auto_ownership_4+')!="-1"),
                 "auto_ownership_4+",
                 {"auto_ownership_4+":["0","1","2","3","4+"],
                  "tmodetp_label":["1: walk","2: bike","3: sov","4: hov 2","5: hov 3+","6: walk to transit",
                                   "7: park-and-ride","8: school bus","9: other–survey only"]},
                 "auto ownership",2,800)

In [10]:
plot_mode_choice(wk_tour.filter(pl.col('auto_available_worker')!="no worker"),
                 "auto_available_worker",
                 {"auto_available_worker":["no car", "cars fewer than workers","enough cars"],
                     "tmodetp_label":["1: walk","2: bike","3: sov","4: hov 2","5: hov 3+","6: walk to transit",
                                      "7: park-and-ride","8: school bus","9: other–survey only"]},
                 "auto availability (worker)",3,400,1000)

In [11]:
plot_mode_choice(wk_tour.filter(pl.col('auto_available_driver')!="no driver"),
                 "auto_available_driver",
                 {"auto_available_driver":["no car", "cars fewer than drivers","enough cars"],
                  "tmodetp_label":["1: walk","2: bike","3: sov","4: hov 2","5: hov 3+","6: walk to transit",
                                   "7: park-and-ride","8: school bus","9: other–survey only"]},
                 "auto availability (driver, showing only households with at least one driver)",3,500,1000)