In [2]:
import os
import sys
sys.path.insert(0, 'X:\\bayarea_urbansim\\scripts')
from baus_analytics_tools import *

In [None]:
# location of two runs: run270-P, run269-NP
folder = 'X:\\bayarea_urbansim\\runs'
export_f = 'M:\\Data\\Urban\\BAUS\\PBA50\\explore_developer_model\\runs_feasibility_all_included\\trace'

In [None]:
runs = {'NP': 'run310',
        'P':  'run313'}

In [None]:
year = '2010'

## examine the interim tables following BAUS steps, starting from base year (2010)

* "slr_inundate"
* "slr_remove_dev"
* "eq_code_buildings"
* "earthquake_demolish"
* ["neighborhood_vars"](#neighborhood_vars)
* ["regional_vars"](#regional_vars)
* ["rsh_simulate"](#rsh_simulate)
* ["rrh_simulate"](#rrh_simulate)
* "nrh_simulate"
* "assign_tenure_to_new_units"
* "household_relocation"
* "households_transition"
* "reconcile_unplaced_households"
* "jobs_transition"
* "hlcm_owner_lowincome_simulate"
* "hlcm_renter_lowincome_simulate"
* "hlcm_owner_simulate"
* "hlcm_renter_simulate"
* "hlcm_owner_simulate_no_unplaced"
* "hlcm_owner_lowincome_simulate_no_unplaced"
* "hlcm_renter_simulate_no_unplaced"
* "hlcm_renter_lowincome_simulate_no_unplaced"
* "reconcile_placed_households"
* "elcm_simulate"
* ["price_vars"](#price_vars)
* "topsheet"
* "simulation_validation"
* ["parcel_summary"](#parcel_summary)
* ["building_summary"](#building_summary)
* "geographic_summary"
* "travel_model_output"
* "hazards_slr_summary"
* "hazards_eq_summary"
* "diagnostic_output"
* "config"
* "slack_report"

### 'neighborhood_vars' <a class="anchor" id="neighborhood_vars"></a>

"neighborhood_vars" - applies pandana to create 226060 network nodes (focusing on pedestrian level), deviding the region into 226060 neighborhoods, and key variables that reflect neighborhood characteristics (existing units, hh, income, jobs, etc.) are gathered from various tables (households, buildings, jobs) following certain rules, e.g. referencing radii (e.g. 1500, 3000), aggregation method (75%, average, median, etc.), filter (e.g. residential vs non-residential buildings). The network is basd on base year open-street network from the H5 file.

pandana: fast move along the network, uses the H5 file has openstreet esiting year network to run a mini-travel model (focusing on pedestrian level), get job conuts, etc. along the network.

In [None]:
# output of 'neighborhood_vars' step
# https://github.com/BayAreaMetro/bayarea_urbansim/blob/820554cbabee51725c445b9fd211542db8876c9f/baus.py#L313

neigh_var_np = pd.read_csv(os.path.join(folder, '{}_nodes_neighborhood_{}.csv'.format(runs['NP'],year)))
neigh_var_p = pd.read_csv(os.path.join(folder, '{}_nodes_neighborhood_{}.csv'.format(runs['P'], year)))
print(neigh_var_np.shape[0])
print(neigh_var_p.shape[0])

# if identical between NP and P - they are not identical
print('check if they are identical:', neigh_var_p.equals(neigh_var_np))

print(list(neigh_var_p))

# rename columns 
neigh_var_np.columns = ['node_id'] + [x+'_NP' for x in list(neigh_var_np)[1:]]
neigh_var_p.columns = ['node_id'] + [x+'_P' for x in list(neigh_var_p)[1:]]

# merge
neigh_var_compare = neigh_var_np.merge(neigh_var_p, on='node_id', how='outer')
print(neigh_var_compare.shape[0])

# export
# neigh_var_compare.to_csv(os.path.join(export_f, 'neigh_vars_{}.csv'.format(year)), index=False)

### "regional_vars" <a class="anchor" id="regional_vars"></a>

In [None]:
regional_var_np = pd.read_csv(os.path.join(folder, '{}_nodes_regional_{}.csv'.format(runs['NP'], year)))
regional_var_p = pd.read_csv(os.path.join(folder, '{}_nodes_regional_{}.csv'.format(runs['P'], year)))

# if identical between NP and P - they are identical in year 2010
print(regional_var_p.equals(regional_var_np))


# rename columns 
regional_var_np.columns = ['tmnode_id'] + [x+'_NP' for x in list(regional_var_np)[1:]]
regional_var_p.columns = ['tmnode_id'] + [x+'_P' for x in list(regional_var_p)[1:]]

# merge
regional_var_compare = regional_var_np.merge(regional_var_p, on='tmnode_id', how='outer')
regional_var_compare['year'] = year
print(regional_var_compare.shape[0])

# export
regional_var_compare.to_csv(os.path.join(export_f, 'regional_vars_{}.csv'.format(year)), index=False)

### "rsh_simulate" <a class="anchor" id="rsh_simulate"></a>

Hedonic model generating unit-level price predictions based on config rsh.yaml.

In the output table, 'unit_residential_price' is the predicted price, which is added to the "residential_units" table. Independent attributes come from "aggregations", which contains neighborhood and regional attributes (https://github.com/BayAreaMetro/bayarea_urbansim/blob/master/features.md#accessibility-variables).
## all parameters used?- need to find out details about "aggregations", the meaning of the .yaml config files.

This is when logsum from TM first enters into the modeling process.

Based on the run log (e.g. run264.log), the first time logsum enters into the modeling process is in step "rsh_simulate" for base year (2010) --> applied in the hedonic model to estimate unit-level residential price ("unit_residential_price"). This is also when difference between P and NP first appears.

In [None]:
# year 2010, output of 'rsh_simulate' step
# https://github.com/BayAreaMetro/bayarea_urbansim/blob/820554cbabee51725c445b9fd211542db8876c9f/baus.py#L316

# NP
rsh_simulate_np_file = os.path.join(folder, '{}_residential_units_rshSim_{}.csv'.format(runs['NP'], year))
building_np_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['NP'], year))
rsh_simulate_parcel_np = summarize_rsh_simulate_result_by_parcel(rsh_simulate_np_file, building_np_file)

# P
rsh_simulate_p_file = os.path.join(folder, '{}_residential_units_rshSim_{}.csv'.format(runs['P'], year))
building_p_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['P'], year))
rsh_simulate_parcel_p = summarize_rsh_simulate_result_by_parcel(rsh_simulate_p_file, building_p_file)


# merge P and NP and export
rsh_simulate_parcel_np.columns = ['parcel_id'] + [x + '_NP' for x in list(rsh_simulate_parcel_np)[1:]]
rsh_simulate_parcel_p.columns = ['parcel_id'] + [x + '_P' for x in list(rsh_simulate_parcel_p)[1:]]

rsh_simulate_parcel_compare = rsh_simulate_parcel_np.merge(rsh_simulate_parcel_p, on='parcel_id', how='outer')

rsh_simulate_parcel_compare.to_csv(os.path.join(export_f, 'rsh_simulate_by_parcel_{}.csv'.format(year)), index=False)

### question: "unit_price" from the "buildings" table - is that the total price of each unit? The model assumes all units of the same building have the same price?

### "rrh_simulate"<a class="anchor" id="rrh_simulate"></a>

"unit_residential_rent" doesn't use logsum

In [None]:
# output of 'rrh_simulate' step           

# NP
rrh_simulate_np_file = os.path.join(folder, '{}_residential_units_rrhSim_{}.csv'.format(runs['NP'], year))
building_np_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['NP'], year))
rrh_simulate_parcel_np = summarize_rrh_simulate_result_by_parcel(rrh_simulate_np_file, building_np_file)

# P
rrh_simulate_p_file = os.path.join(folder, '{}_residential_units_rrhSim_{}.csv'.format(runs['P'], year))
building_p_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['P'], year))
rrh_simulate_parcel_p = summarize_rrh_simulate_result_by_parcel(rrh_simulate_p_file, building_p_file)


# merge P and NP and export
rrh_simulate_parcel_np.columns = ['parcel_id'] + [x + '_NP' for x in list(rrh_simulate_parcel_np)[1:]]
rrh_simulate_parcel_p.columns = ['parcel_id'] + [x + '_P' for x in list(rrh_simulate_parcel_p)[1:]]

rrh_simulate_parcel_compare = rrh_simulate_parcel_np.merge(rrh_simulate_parcel_p, on='parcel_id', how='outer')

rrh_simulate_parcel_compare.to_csv(os.path.join(export_f, 'rrh_simulate_by_parcel_{}.csv'.format(year)), index=False)

### "price_vars"<a class="anchor" id="price_vars"></a>

The step "price_vars" adds price variables to neighborhood_nodes, 4 new columns: 'residential', 'retail', 'office', 'industrial'. The 'residential' value feeds into "parcel_sales_price_sqft_func" (index as parcel_id and value as yearly_rent) of the feasibility model. For residential, it is the 'residential' field.
https://github.com/BayAreaMetro/bayarea_urbansim/blob/820554cbabee51725c445b9fd211542db8876c9f/baus/variables.py#L538
https://github.com/BayAreaMetro/bayarea_urbansim/blob/820554cbabee51725c445b9fd211542db8876c9f/baus/variables.py#L333

However, the same "residential" value is called "residential_sales_price_sqft" in the feasibility setting. So is it rent or sales price?

#### Are these prices or rent or something else?

In [None]:
price_vars_np = pd.read_csv(os.path.join(folder, '{}_nodes_priceVars_{}.csv'.format(runs['NP'], year)))
price_vars_p = pd.read_csv(os.path.join(folder, '{}_nodes_priceVars_{}.csv'.format(runs['P'], year)))

# rename columns
price_vars_np.columns = ['node_id'] + [x + '_NP' for x in list(price_vars_np)[1:]]
price_vars_p.columns = ['node_id'] + [x + '_P' for x in list(price_vars_p)[1:]]

# merge
price_vars_compare = price_vars_np.merge(price_vars_p, on='node_id', how='outer')

# export
price_vars_compare.to_csv(os.path.join(export_f, 'price_vars_{}.csv'.format(year)), index=False)

 ### "parcel_summary"<a class="anchor" id="parcel_summary"></a>

In [None]:
# NP parcel_data
parcel_np_file = os.path.join(folder, '{}_parcel_data_{}.csv'.format(runs['NP'], year))
parcel_np = load_housing_data_from_parcel(parcel_np_file,
                                          ['tothh', 'zoned_du_underbuild', 'zoned_du',
                                           'residential_units'])

# P parcel_data
parcel_p_file = os.path.join(folder, '{}_parcel_data_{}.csv'.format(runs['P'], year))
parcel_p = load_housing_data_from_parcel(parcel_p_file,
                                        ['tothh', 'zoned_du_underbuild', 'zoned_du',
                                         'residential_units'])

# merge P and NP and export
parcel_np.columns = ['parcel_id'] + [x+'_NP' for x in list(parcel_np)[1:]]
parcel_p.columns = ['parcel_id'] + [x+'_P' for x in list(parcel_p)[1:]]
parcel_compare = parcel_np.merge(parcel_p, on='parcel_id', how='outer')
display(parcel_compare)

parcel_compare.to_csv(os.path.join(export_f, 'parcel_data_compare_{}.csv'.format(year)), index=False)

### "building_summary"<a class="anchor" id="building_summary"></a>

In [None]:
# NP
blg_np_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['NP'], year))
blg_parcel_np = summarize_units_by_source_from_blg(blg_np_file)
print(blg_parcel_np.shape[0])
display(blg_parcel_np.head())
blg_parcel_np.dropna(axis=1, how='all', inplace=True)

# P
blg_p_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['P'], year))
blg_parcel_p = summarize_units_by_source_from_blg(blg_p_file)
print(blg_parcel_p.shape[0])
display(blg_parcel_p.head())
blg_parcel_p.dropna(axis=1, how='all', inplace=True)

# merge P and NP and export
blg_parcel_np.columns = ['parcel_id'] + [x+'_NP' for x in list(blg_parcel_np)[1:]]
blg_parcel_p.columns = ['parcel_id'] + [x+'_P' for x in list(blg_parcel_p)[1:]]
blg_parcel_compare = blg_parcel_np.merge(blg_parcel_p, on='parcel_id', how='outer')
display(blg_parcel_compare.head())

blg_parcel_compare.to_csv(os.path.join(export_f, 'building_data_parcel_compare_{}.csv'.format(year)), index=False)