In [1]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import toml
import polars as pl
from pathlib import Path
import util
import sys

sys.path.append("../../notebook_styling")
import psrc_theme

In [2]:
config = toml.load(Path(Path.cwd(), '..\..\..\..\configuration', 'validation_configuration.toml'))
input_config = toml.load(Path(Path.cwd(), '..\..\..\..\configuration', 'input_configuration.toml'))

data = util.ValidationData(config,input_config,['hh', 'person', 'tour', 'land_use', 'parcel_geog'])

In [5]:
hh = data.hh.to_pandas()
person = data.person.\
    join(data.parcel_geog, how="left",left_on='pwpcl',right_on='ParcelID').\
        to_pandas()
tour = data.tour.to_pandas()

In [6]:
df_tour = tour.copy()
df_hh = hh.copy()
df_person = person.copy()

# auto_ownership with 4+
df_hh['auto_ownership_4+'] = df_hh['hhvehs'].apply(lambda x: "4+" if x>=4.0 else str(x))
# hhsize with 4+
df_hh['hhsize_4+'] = df_hh['hhsize'].apply(lambda x: "4+" if x>=4.0 else str(x))
# Add column for (potential) drivers adults (all hh members 16 and above)
df_hh['drivers'] = df_hh['hhsize']-df_hh['hh515']-df_hh['hhcu5']#-df_hh['hhhsc']
# auto availability
df_hh['auto_count_driver'] = df_hh['hhvehs']-df_hh['drivers']
df_hh['auto_available_driver'] = np.where(df_hh['drivers']<=0, "no driver",
                                          np.where(df_hh['hhvehs']<=0, "no car",
                                                   np.where(df_hh['auto_count_driver']<0, "cars fewer than drivers", "enough cars")))

# add person type labels
ptype_cat = {1: "full time worker",
             2: "part time worker",
             3: "non-worker age 65+",
             4: "other non-working adult",
             5: "university student",
             6: "grade school student/child age 16+",
             7: "child age 5-15",
             8: "child age 0-4"}
df_person['pptyp_label'] = df_person['pptyp'].map(ptype_cat)

mode_cat = {1: "1: walk",
            2: "2: bike",
            3: "3: sov",
            4: "4: hov 2",
            5: "5: hov 3+",
            6: "6: walk to transit",
            7: "7: park-and-ride",
            8: "8: school bus",
            9: "9:tnc"}
df_tour['tmodetp_label'] = df_tour['tmodetp'].map(mode_cat)

df_person = df_person.merge(df_hh,  how='left', on=['hhno','source']) # get auto ownership from hh data

df_tour = df_tour.merge(df_person, how='left', on=['pno','hhno','source'])

In [7]:
wk_base_subtour = df_tour.loc[(df_tour['parent']!=0)].copy()
wk_tour = df_tour.loc[(df_tour['parent']==0) & (df_tour['pdpurp']==1)].copy()

In [8]:
test = wk_tour.loc[wk_tour['subtrs']!=11].copy()
df_plot = test.groupby(['source','subtrs'])['toexpfac'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

df_plot_ct = test.groupby(['source','subtrs'])['toexpfac'].count().reset_index(). \
    rename(columns={'toexpfac':'sample count'})
df_plot = df_plot.merge(df_plot_ct, on=['source','subtrs'])

fig = px.bar(df_plot.sort_values(by=['source']), x="subtrs", y="percentage", color="source",
             barmode="group",hover_data=['sample count'],title="number of workbased subtours (only work tours)")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  xaxis = dict(dtick = 1, categoryorder='category ascending'),
                  yaxis=dict(tickformat=".2%"))
fig.show()


In [6]:
wk_tour.loc[wk_tour['subtrs']==11][['source','hhno', 'pno', 'day', 'tour','subtrs','parent', 'pdpurp','tmodetp_label']]

Unnamed: 0,source,hhno,pno,day,tour,subtrs,parent,pdpurp,tmodetp_label


In [7]:
df_plot = wk_base_subtour.groupby(['source','tmodetp_label'])['toexpfac'].sum().reset_index()
df_plot['percentage'] = df_plot.groupby(['source'], group_keys=False)['toexpfac']. \
    apply(lambda x: x / float(x.sum()))

df_plot_ct = wk_base_subtour.groupby(['source','tmodetp_label'])['toexpfac'].count().reset_index(). \
    rename(columns={'toexpfac':'sample count'})
df_plot = df_plot.merge(df_plot_ct, on=['source','tmodetp_label'])

fig = px.bar(df_plot.sort_values(by=['source']), x="tmodetp_label", y="percentage", color="source",
             barmode="group",hover_data=['sample count'],title="workbased subtour mode")
fig.update_layout(height=400, width=700, font=dict(size=11),
                  xaxis = dict(dtick = 1, categoryorder='category ascending'),
                  yaxis=dict(tickformat=".2%"))
fig.show()

### mode choice by segment

In [10]:
def plot_mode_choice(df: pd.DataFrame, grp_var: str, order_list: dict, title_name: str, n_nol: int, height=400, width=800):
    df_plot = df.groupby(['source',grp_var,'tmodetp_label'])['toexpfac'].sum().reset_index()
    df_plot['percentage'] = df_plot.groupby(['source',grp_var], group_keys=False)['toexpfac']. \
        apply(lambda x: x / float(x.sum()))

    df_plot_ct = df.groupby(['source',grp_var,'tmodetp_label'])['toexpfac'].count().reset_index(). \
        rename(columns={'toexpfac':'sample count'})
    df_plot = df_plot.merge(df_plot_ct, on=['source',grp_var,'tmodetp_label'])

    fig = px.bar(df_plot.sort_values(['source','tmodetp_label']),
                 x="percentage", y="tmodetp_label", color="source",barmode="group",
                 facet_col=grp_var, facet_col_wrap=n_nol, orientation='h',
                 hover_data=['sample count'],
                 category_orders=order_list,
                 title="work tour mode choice by " + title_name)
    fig.update_layout(height=height, width=width)
    fig.for_each_annotation(lambda a: a.update(text = a.text.split("=")[-1]))
    fig.for_each_xaxis(lambda a: a.update(tickformat = ".1%"))
    fig.show()

In [None]:
incl_county = ["King", "Kitsap", "Pierce", "Snohomish"]

plot_mode_choice(wk_base_subtour.loc[wk_base_subtour["CountyName"].isin(incl_county)],"CountyName",
                 {"CountyName":["King", "Kitsap", "Pierce", "Snohomish"],
                  "tmodetp_label":mode_cat.values()},
                 "work county",
                 2,600)

In [9]:
plot_mode_choice(wk_base_subtour.loc[wk_base_subtour['auto_ownership_4+']!="-1"],"auto_ownership_4+",
                 {"auto_ownership_4+":["0","1","2","3","4+"],
                  "tmodetp_label":["1: walk","2: bike","3: sov","4: hov 2","5: hov 3+","6: walk to transit",
                                   "7: park-and-ride","8: school bus","9: other–survey only"]},
                 "auto ownership",2,800)

In [10]:
plot_mode_choice(wk_base_subtour.loc[wk_base_subtour['auto_available_driver'] != "no dirver"], "auto_available_driver",
                 {"auto_available_driver": ["no car", "cars fewer than drivers", "enough cars"],
                  "tmodetp_label": ["1: walk", "2: bike", "3: sov", "4: hov 2", "5: hov 3+", "6: walk to transit",
                                    "7: park-and-ride", "8: school bus", "9: other–survey only"]},
                 "auto availability (driver, showing only households with at least one driver)", 3, 500, 1000)