In [1]:
import os
import sys
sys.path.insert(0, 'X:\\bayarea_urbansim\\scripts')
from baus_analytics_tools import *

In [2]:
# location of two runs: run270-P, run269-NP
folder = 'X:\\bayarea_urbansim\\runs'
export_f = 'M:\\Data\\Urban\\BAUS\\PBA50\\explore_developer_model\\runs_feasibility_all_included\\trace'

In [3]:
runs = {'NP': 'run290',
        'P':  'run292'}

In [4]:
year = '2050'

## examine the interim tables following BAUS steps in simulation year 2015 - 2050

* "slr_inundate"
* "slr_remove_dev"
* "eq_code_buildings"
* "earthquake_demolish"
* ["neighborhood_vars"](#neighborhood_vars)
* ["regional_vars"](#regional_vars)
* "nrh_simulate"
* "household_relocation"
* "households_transition"
* "reconcile_unplaced_households"
* "jobs_relocation"
* "jobs_transition"
* "balance_rental_and_ownership_hedonics"
* ["price_vars"](#price_vars)
* "scheduled_development_events"
* "preserve_affordable"
* "lump_sum_accounts"
* "subsidized_residential_developer_lump_sum_accts"
* "office_lump_sum_accounts"
* "subsidized_office_developer_lump_sum_accts"
* ["alt_feasibility"](#alt_feasibility)
* ["residential_developer"](#residential_developer)
* "developer_reprocess"
* "retail_developer"
* "office_developer"
* "accessory_units"
* "calculate_vmt_fees"
* "remove_old_units"
* "initialize_new_units"
* "reconcile_unplaced_households"
* ["rsh_simulate"](#rsh_simulate)
* ["rrh_simulate"](#rrh_simulate)
* "assign_tenure_to_new_units"
* "hlcm_owner_lowincome_simulate"
* "hlcm_renter_lowincome_simulate"
* "hlcm_owner_simulate"
* "hlcm_renter_simulate"
* "hlcm_owner_simulate_no_unplaced"
* "hlcm_owner_lowincome_simulate_no_unplaced"
* "hlcm_renter_simulate_no_unplaced"
* "hlcm_renter_lowincome_simulate_no_unplaced"
* "reconcile_placed_households"
* "proportional_elcm"
* "elcm_simulate"
* "topsheet"
* "simulation_validation"
* ["parcel_summary"](#parcel_summary)
* ["building_summary"](#building_summary)
* "diagnostic_output"
* "geographic_summary"
* "travel_model_output"
* "hazards_slr_summary"
* "hazards_eq_summary"
* "slack_report"

### 'neighborhood_vars' <a class="anchor" id="neighborhood_vars"></a>

pandana: fast move along the network, uses the H5 file has openstreet esiting year network to run a mini-travel model (focusing on pedestrian level), get job conuts, etc. along the network.

In [5]:
# output of 'neighborhood_vars' step
# https://github.com/BayAreaMetro/bayarea_urbansim/blob/820554cbabee51725c445b9fd211542db8876c9f/baus.py#L313

neigh_var_np = pd.read_csv(os.path.join(folder, '{}_nodes_neighborhood_{}.csv'.format(runs['NP'],year)))
neigh_var_p = pd.read_csv(os.path.join(folder, '{}_nodes_neighborhood_{}.csv'.format(runs['P'], year)))
print(neigh_var_np.shape[0])
print(neigh_var_p.shape[0])

# if identical between NP and P - they are not identical
print('check if they are identical:', neigh_var_p.equals(neigh_var_np))

print(list(neigh_var_p))

# rename columns 
neigh_var_np.columns = ['node_id'] + [x+'_NP' for x in list(neigh_var_np)[1:]]
neigh_var_p.columns = ['node_id'] + [x+'_P' for x in list(neigh_var_p)[1:]]

# merge
neigh_var_compare = neigh_var_np.merge(neigh_var_p, on='node_id', how='outer')
print(neigh_var_compare.shape[0])

# export
# neigh_var_compare.to_csv(os.path.join(export_f, 'neigh_vars_{}.csv'.format(year)), index=False)

226060
226060
check if they are identical: False
['Unnamed: 0', 'retail_sqft_3000', 'sum_income_3000', 'residential_units_500', 'residential_units_1500', 'office_1500', 'retail_1500', 'industrial_1500', 'ave_sqft_per_unit', 'ave_lot_size_per_unit', 'population', 'poor', 'renters', 'sfdu', 'ave_hhsize', 'jobs_500', 'jobs_1500', 'ave_income_1500', 'ave_income_500', 'retail_ratio']
226060


### "regional_vars"<a class="anchor" id="regional_vars"></a>

In [6]:
regional_var_np = pd.read_csv(os.path.join(folder, '{}_nodes_regional_{}.csv'.format(runs['NP'], year)))
regional_var_p = pd.read_csv(os.path.join(folder, '{}_nodes_regional_{}.csv'.format(runs['P'], year)))

# if identical between NP and P - they are identical in year 2010
print(regional_var_p.equals(regional_var_np))


# rename columns 
regional_var_np.columns = ['tmnode_id'] + [x+'_NP' for x in list(regional_var_np)[1:]]
regional_var_p.columns = ['tmnode_id'] + [x+'_P' for x in list(regional_var_p)[1:]]

# merge
regional_var_compare = regional_var_np.merge(regional_var_p, on='tmnode_id', how='outer')
print(regional_var_compare.shape[0])

# export
regional_var_compare.to_csv(os.path.join(export_f, 'regional_vars_{}.csv'.format(year)), index=False)

False
12016


#### the two runs have different jobs_500, jobs_1500 values. Why? Does this affect residential development?

### "price_vars" <a class="anchor" id="price_vars"></a>

In [7]:
price_vars_np = pd.read_csv(os.path.join(folder, '{}_nodes_priceVars_{}.csv'.format(runs['NP'], year)))
price_vars_p = pd.read_csv(os.path.join(folder, '{}_nodes_priceVars_{}.csv'.format(runs['P'], year)))

# rename columns
price_vars_np.columns = ['node_id'] + [x + '_NP' for x in list(price_vars_np)[1:]]
price_vars_p.columns = ['node_id'] + [x + '_P' for x in list(price_vars_p)[1:]]

# merge
price_vars_compare = price_vars_np.merge(price_vars_p, on='node_id', how='outer')

# export
price_vars_compare.to_csv(os.path.join(export_f, 'price_vars_{}.csv'.format(year)), index=False)

### "alt_feasibility" <a class="anchor" id="alt_feasibility"></a>

In [8]:
# look up feasibility of all parcels, regardless of development feasibility
# 'feasibility' table contains the following development types: 
# dev_types = ['retail', 'industrial', 'office', 'residential', 'mixedresidential', 'mixedoffice']

# however, in BAUS run, the step "residential_developer" model following this step only calls 'residential' form,
# so only need to consider the 'residential' portion


# load the feasibility table and extract residential and mixed-residential portions
feasibility_all_np_file = os.path.join(folder,'{}_feasibility_allParcels_{}.csv'.format(runs['NP'], year))
print(feasibility_all_np_file)
feasibility_all_np = load_feasibility(feasibility_all_np_file)
print('NP all-parcel feasibility has {} rows'.format(feasibility_all_np.shape[0]))
feasibility_all_res_np = get_feasibility_by_useType(feasibility_all_np, 'residential')

feasibility_all_p_file = os.path.join(folder,'{}_feasibility_allParcels_{}.csv'.format(runs['P'], year))
print(feasibility_all_p_file)
feasibility_all_p = load_feasibility(feasibility_all_p_file)
print('P all-parcel feasibility has {} rows'.format(feasibility_all_p.shape[0]))

feasibility_all_res_p = get_feasibility_by_useType(feasibility_all_p, 'residential')

print(list(feasibility_all_res_np))

X:\bayarea_urbansim\runs\run290_feasibility_allParcels_2050.csv


  if sys.path[0] == '':


NP all-parcel feasibility has 434541 rows
X:\bayarea_urbansim\runs\run292_feasibility_allParcels_2050.csv




P all-parcel feasibility has 434898 rows
['parcel_id', 'parking_config', 'building_sqft', 'building_cost', 'parking_ratio', 'stories', 'total_cost', 'building_revenue', 'max_profit_far', 'max_profit', 'oldest_building', 'total_sqft', 'total_residential_units', 'max_far', 'max_dua', 'land_cost', 'residential', 'min_max_fars', 'max_height', 'building_purchase_price', 'building_purchase_price_sqft', 'residential_sales_price_sqft', 'pda_pba40', 'pda_pba50', 'trich_id', 'cat_id', 'tra_id', 'ppa_id', 'sesit_id', 'coc_id', 'juris', 'county', 'superdistrict', 'geom_id', 'vmt_res_cat', 'vmt_nonres_cat', 'residential_sqft', 'non_residential_sqft']


### question: why NP and P have different number of rows in feasibility table?

In [9]:
# compare key output columns: max_profit, residential_sqft

feasibility_res_np = feasibility_all_res_np[['parcel_id', 'superdistrict',
                                             'total_cost', 'building_revenue', 'max_profit_far', 'max_profit',
                                             'residential_sales_price_sqft',
                                             'residential_sqft']]
feasibility_res_np.columns = ['parcel_id'] + [x+'_NP' for x in list(feasibility_res_np)[1:]]

feasibility_res_p = feasibility_all_res_p[['parcel_id', 'superdistrict',
                                            'total_cost', 'building_revenue', 'max_profit_far', 'max_profit',
                                            'residential_sales_price_sqft',
                                            'residential_sqft']]
feasibility_res_p.columns = ['parcel_id'] + [x+'_P' for x in list(feasibility_res_p)[1:]]

feasibility_compare = feasibility_res_np.merge(feasibility_res_p,
                                               on='parcel_id',
                                               how='outer')
display(feasibility_compare.head()) 

Unnamed: 0,parcel_id,superdistrict_NP,total_cost_NP,building_revenue_NP,max_profit_far_NP,max_profit_NP,residential_sales_price_sqft_NP,residential_sqft_NP,superdistrict_P,total_cost_P,building_revenue_P,max_profit_far_P,max_profit_P,residential_sales_price_sqft_P,residential_sqft_P
0,18,18.0,12631270.0,40952400.0,3.25,28321130.0,898.924822,56946.369455,18.0,12631270.0,41266340.0,3.25,28635070.0,905.815866,56946.369455
1,20,18.0,6129055.0,19497200.0,3.25,13368140.0,882.002215,27632.011408,18.0,6129055.0,19757300.0,3.25,13628240.0,893.768485,27632.011408
2,21,18.0,11160230.0,35501920.0,3.25,24341690.0,882.002215,50314.379684,18.0,11160230.0,35975530.0,3.25,24815300.0,893.768485,50314.379684
3,23,18.0,1919872.0,6154671.0,3.25,4234799.0,888.840158,8655.480111,18.0,1919872.0,6223556.0,3.25,4303685.0,898.788452,8655.480111
4,38,,,,,,,,,,,,,,


In [10]:
# # drop rows where all columns (except for parcel_id) are NA
# feasibility_compare.dropna(subset=list(feasibility_compare)[1:], how='all', inplace=True)

In [11]:
# compare max_project

feasibility_compare['max_profit_chk'] = ''
feasibility_compare.loc[feasibility_compare.max_profit_NP.isnull() & feasibility_compare.max_profit_P.isnull(), 'max_profit_chk'] = 'not allow'
feasibility_compare.loc[feasibility_compare.max_profit_NP.isnull() & feasibility_compare.max_profit_P.notnull(), 'max_profit_chk'] = 'only allow in P'
feasibility_compare.loc[feasibility_compare.max_profit_NP.notnull() & feasibility_compare.max_profit_P.isnull(), 'max_profit_chk'] = 'only allow in NP'

feasibility_compare.loc[feasibility_compare.max_profit_NP.notnull() & feasibility_compare.max_profit_P.notnull() \
                        & (feasibility_compare.max_profit_NP > 0) \
                        & (feasibility_compare.max_profit_P > 0), 'max_profit_chk'] = 'Both profitable'

feasibility_compare.loc[feasibility_compare.max_profit_NP.notnull() & feasibility_compare.max_profit_P.notnull() \
                        & (feasibility_compare.max_profit_NP > 0) \
                        & (feasibility_compare.max_profit_P <= 0), 'max_profit_chk'] = 'NP profitable only'

feasibility_compare.loc[feasibility_compare.max_profit_NP.notnull() & feasibility_compare.max_profit_P.notnull() \
                        & (feasibility_compare.max_profit_NP <= 0) \
                        & (feasibility_compare.max_profit_P > 0), 'max_profit_chk'] = 'P profitable only'

feasibility_compare.loc[feasibility_compare.max_profit_NP.notnull() & feasibility_compare.max_profit_P.notnull() \
                        & (feasibility_compare.max_profit_NP <= 0) \
                        & (feasibility_compare.max_profit_P <= 0), 'max_profit_chk'] = 'Both not profitable'

print(feasibility_compare.max_profit_chk.value_counts())

# export
feasibility_compare.to_csv(os.path.join(export_f, 'feasibility_compare_{}.csv').format(year), index=False)

Both not profitable    229090
Both profitable        148773
not allow               31715
only allow in P         23659
only allow in NP        23333
NP profitable only        944
P profitable only         934
Name: max_profit_chk, dtype: int64


In [12]:
# 'parcel_id', 'superdistrict', 'building_sqft', 'building_cost', 'parking_ratio', 'stories', 'total_cost', 
# 'building_revenue', 'max_profit_far', 'max_profit', 'oldest_building',
# 'total_sqft', 'total_residential_units', 'max_far', 'max_dua', 'land_cost', 'residential',
# 'min_max_fars', 'max_height', 'building_purchase_price', 'building_purchase_price_sqft', 'residential_sales_price_sqft

In [13]:
# chk = feasibility_compare.loc[feasibility_compare.max_profit_chk == 'Both profitable'][['parcel_id',
#                                                                                     'residential_sqft_np', 'residential_sqft_p',
#                                                                                     'total_cost_np', 'total_cost_p',
#                                                                                     'building_revenue_np', 'building_revenue_p',
#                                                                                     'residential_sales_price_sqft_np', 'residential_sales_price_sqft_p']]
# chk['rev_diff'] = chk['building_revenue_p'] - chk['building_revenue_np']
# chk['cost_diff'] = chk['total_cost_p'] - chk['total_cost_np']
# chk['price'] = chk['residential_sales_price_sqft_p'] - chk['residential_sales_price_sqft_np']
# chk

In [14]:
# # compare feasible residential_sqft
# feasibility_compare['res_sqft_diff'] = ''
# feasibility_compare.loc[feasibility_compare.residential_sqft_np.isnull() & feasibility_compare.residential_sqft_p.isnull(), 'res_sqft_diff'] = 'not allow'
# feasibility_compare.loc[feasibility_compare.residential_sqft_np.isnull() & feasibility_compare.residential_sqft_p.notnull(), 'res_sqft_diff'] = 'not allow in NP'
# feasibility_compare.loc[feasibility_compare.residential_sqft_np.notnull() & feasibility_compare.residential_sqft_p.isnull(), 'res_sqft_diff'] = 'not allow in P'
# feasibility_compare.loc[feasibility_compare.residential_sqft_np.notnull() & feasibility_compare.residential_sqft_p.notnull() \
#                         & (feasibility_compare.residential_sqft_np == feasibility_compare.residential_sqft_p), 'res_sqft_diff'] = 'NP = P'

# feasibility_compare.loc[feasibility_compare.residential_sqft_np.notnull() & feasibility_compare.residential_sqft_p.notnull() \
#                         & (feasibility_compare.residential_sqft_np > feasibility_compare.residential_sqft_p), 'res_sqft_diff'] = 'NP > P'

# feasibility_compare.loc[feasibility_compare.residential_sqft_np.notnull() & feasibility_compare.residential_sqft_p.notnull() \
#                         & (feasibility_compare.residential_sqft_np < feasibility_compare.residential_sqft_p), 'res_sqft_diff'] = 'NP < P'

In [15]:
# # status of parcels at least only keep parcels feasible at least in one of NP or P (including profitable in both) 
# feasibility_compare
# profitable_feasibility_compare = feasibility_compare.loc[(feasibility_compare.max_profit_np > 0) | (feasibility_compare.max_profit_p > 0)]

In [16]:
# profitable_feasibility_compare['res_sqft_diff'].value_counts()

### "residential_developer" <a class="anchor" id="residential_developer"></a>

In [17]:
# read the data
resDev_np = pd.read_csv(os.path.join(folder, 'run290_parcel_output_afterReSDev_2015.csv'.format(runs['NP'], year)))
print('{} rows of parcel_output after running residential developer model, \
{} unique parcel_id, the following fields: \n\n{}'.format(resDev_np.shape[0],
                                                        len(resDev_np.parcel_id.unique()),
                                                        list(resDev_np)))

############# here, 'total_residential_units' is the count of unit before developing
############# the question is: where is it from and when is this updated?

20894 rows of parcel_output after running residential developer model, 9253 unique parcel_id, the following fields: 

['Unnamed: 0', 'SDEM', 'ave_unit_size', 'building_cost', 'building_purchase_price', 'building_purchase_price_sqft', 'building_revenue', 'building_sqft', 'building_type', 'building_type_id', 'cat_id', 'coc_id', 'county', 'current_units', 'deed_restricted_units', 'fees', 'form', 'geom_id', 'inclusionary_units', 'job_spaces', 'juris', 'juris_coc', 'juris_ppa', 'juris_sesit', 'juris_tra', 'juris_trich', 'land_cost', 'max_dua', 'max_far', 'max_height', 'max_profit', 'max_profit_far', 'max_profit_per_sqft', 'min_max_fars', 'net_units', 'non_residential_rent', 'non_residential_sqft', 'oldest_building', 'parcel_id', 'parcel_size', 'parking_config', 'parking_ratio', 'pda_id', 'pda_pba40', 'pda_pba50', 'policy_based_revenue_reduction', 'ppa_id', 'preserved_units', 'redfin_sale_price', 'redfin_sale_year', 'residential', 'residential_price', 'residential_sales_price_sqft', 'residen

  exec(code_obj, self.user_global_ns, self.user_ns)


In [18]:
# NP
resDev_np_file = os.path.join(folder, '{}_parcel_output_afterReSDev_{}.csv'.format(runs['NP'], year))
resDev_parcel_np = summarize_parcel_output_by_parcel(resDev_np_file, ['residential_units'])

# P
resDev_p_file = os.path.join(folder, '{}_parcel_output_afterReSDev_{}.csv'.format(runs['P'], year))
resDev_parcel_p = summarize_parcel_output_by_parcel(resDev_p_file, ['residential_units'])

# merge P and NP and export
resDev_parcel_np.columns = ['parcel_id'] + [x + '_NP' for x in list(resDev_parcel_np)[1:]]
resDev_parcel_p.columns = ['parcel_id'] + [x + '_P' for x in list(resDev_parcel_p)[1:]]

resDev_parcel_compare = resDev_parcel_np.merge(resDev_parcel_p, on='parcel_id', how='outer')
resDev_parcel_compare.to_csv(os.path.join(export_f, 'resDevModel_by_parcel_{}.csv'.format(year)), index=False)

read 47439 rows of parcel_output data X:\bayarea_urbansim\runs\run290_parcel_output_afterReSDev_2050.csv, with 25878 unique parcel_id
read 47181 rows of parcel_output data X:\bayarea_urbansim\runs\run292_parcel_output_afterReSDev_2050.csv, with 25661 unique parcel_id


### "rsh_simulate" <a class="anchor" id="rsh_simulate"></a>

In [19]:
# NP
rsh_simulate_np_file = os.path.join(folder, '{}_residential_units_rshSim_{}.csv'.format(runs['NP'], year))
building_np_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['NP'], year))
rsh_simulate_parcel_np = summarize_rsh_simulate_result_by_parcel(rsh_simulate_np_file, building_np_file)

# P
rsh_simulate_p_file = os.path.join(folder, '{}_residential_units_rshSim_{}.csv'.format(runs['P'], year))
building_p_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['P'], year))
rsh_simulate_parcel_p = summarize_rsh_simulate_result_by_parcel(rsh_simulate_p_file, building_p_file)


# merge P and NP and export
rsh_simulate_parcel_np.columns = ['parcel_id'] + [x + '_NP' for x in list(rsh_simulate_parcel_np)[1:]]
rsh_simulate_parcel_p.columns = ['parcel_id'] + [x + '_P' for x in list(rsh_simulate_parcel_p)[1:]]

rsh_simulate_parcel_compare = rsh_simulate_parcel_np.merge(rsh_simulate_parcel_p, on='parcel_id', how='outer')
rsh_simulate_parcel_compare.to_csv(os.path.join(export_f, 'rsh_simulate_by_parcel_{}.csv'.format(year)), index=False)

read 4259931 rows of unit-level rsh_simulate result X:\bayarea_urbansim\runs\run290_residential_units_rshSim_2050.csv
read 1873606 rows of building_data X:\bayarea_urbansim\runs\run290_building_data_2050.csv
read 4261145 rows of unit-level rsh_simulate result X:\bayarea_urbansim\runs\run292_residential_units_rshSim_2050.csv
read 1873531 rows of building_data X:\bayarea_urbansim\runs\run292_building_data_2050.csv


### "rrh_simulate" <a class="anchor" id="rrh_simulate"></a>

In [20]:
# NP
rrh_simulate_np_file = os.path.join(folder, '{}_residential_units_rrhSim_{}.csv'.format(runs['NP'], year))
building_np_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['NP'], year))
rrh_simulate_parcel_np = summarize_rrh_simulate_result_by_parcel(rrh_simulate_np_file, building_np_file)

# P
rrh_simulate_p_file = os.path.join(folder, '{}_residential_units_rrhSim_{}.csv'.format(runs['P'], year))
building_p_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['P'], year))
rrh_simulate_parcel_p = summarize_rrh_simulate_result_by_parcel(rrh_simulate_p_file, building_p_file)


# merge P and NP and export
rrh_simulate_parcel_np.columns = ['parcel_id'] + [x + '_NP' for x in list(rrh_simulate_parcel_np)[1:]]
rrh_simulate_parcel_p.columns = ['parcel_id'] + [x + '_P' for x in list(rrh_simulate_parcel_p)[1:]]

rrh_simulate_parcel_compare = rrh_simulate_parcel_np.merge(rrh_simulate_parcel_p, on='parcel_id', how='outer')
rrh_simulate_parcel_compare.to_csv(os.path.join(export_f, 'rrh_simulate_by_parcel_{}.csv'.format(year)), index=False)

read 4259931 rows of unit-level rrh_simulate result X:\bayarea_urbansim\runs\run290_residential_units_rrhSim_2050.csv
read 1873606 rows of building_data X:\bayarea_urbansim\runs\run290_building_data_2050.csv
read 4261145 rows of unit-level rrh_simulate result X:\bayarea_urbansim\runs\run292_residential_units_rrhSim_2050.csv
read 1873531 rows of building_data X:\bayarea_urbansim\runs\run292_building_data_2050.csv


### "parcel_summary" <a class="anchor" id="parcel_summary"></a>

In [21]:
# NP parcel_data
parcel_np_file = os.path.join(folder, '{}_parcel_data_{}.csv'.format(runs['NP'], year))
parcel_np = load_housing_data_from_parcel(parcel_np_file,
                                          ['tothh', 'zoned_du_underbuild', 'zoned_du',
                                           'residential_units'])

# P parcel_data
parcel_p_file = os.path.join(folder, '{}_parcel_data_{}.csv'.format(runs['P'], year))
parcel_p = load_housing_data_from_parcel(parcel_p_file,
                                        ['tothh', 'zoned_du_underbuild', 'zoned_du',
                                         'residential_units'])

# merge P and NP and export
parcel_np.columns = ['parcel_id'] + [x+'_NP' for x in list(parcel_np)[1:]]
parcel_p.columns = ['parcel_id'] + [x+'_P' for x in list(parcel_p)[1:]]
parcel_compare = parcel_np.merge(parcel_p, on='parcel_id', how='outer')
display(parcel_compare)

parcel_compare.to_csv(os.path.join(export_f, 'parcel_data_compare_{}.csv'.format(year)), index=False)

load 1956212 rows of parcel data X:\bayarea_urbansim\runs\run290_parcel_data_2050.csv
load 1956212 rows of parcel data X:\bayarea_urbansim\runs\run292_parcel_data_2050.csv


Unnamed: 0,parcel_id,zoned_du_underbuild_NP,zoned_du_NP,tothh_NP,residential_units_NP,zoned_du_P,zoned_du_underbuild_P,tothh_P,residential_units_P
0,229116,0,0.000000,0.0,0.0,0.000000,0,0.0,0.0
1,244166,0,0.000000,0.0,0.0,0.000000,0,0.0,0.0
2,202378,96,130.445563,33.0,34.0,130.445563,96,32.0,34.0
3,2004420,0,0.000000,0.0,0.0,0.000000,0,0.0,0.0
4,340332,14,14.289466,0.0,0.0,14.289466,14,0.0,0.0
...,...,...,...,...,...,...,...,...,...
1956207,782909,0,0.000000,0.0,0.0,0.000000,0,0.0,0.0
1956208,2054504,0,0.000000,0.0,0.0,0.000000,0,0.0,0.0
1956209,2054505,0,0.000000,1750.0,1843.0,0.000000,0,1768.0,1843.0
1956210,2054506,0,0.000000,0.0,0.0,0.000000,0,0.0,0.0


### "building_summary" <a class="anchor" id="building_summary"></a>

In [22]:
# NP
blg_np_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['NP'], year))
blg_parcel_np = summarize_units_by_source_from_blg(blg_np_file)
print(blg_parcel_np.shape[0])
display(blg_parcel_np.head())

# P
blg_p_file = os.path.join(folder, '{}_building_data_{}.csv'.format(runs['P'], year))
blg_parcel_p = summarize_units_by_source_from_blg(blg_p_file)
print(blg_parcel_p.shape[0])
display(blg_parcel_p.head())

# merge P and NP and export
blg_parcel_np.columns = ['parcel_id'] + [x+'_NP' for x in list(blg_parcel_np)[1:]]
blg_parcel_p.columns = ['parcel_id'] + [x+'_P' for x in list(blg_parcel_p)[1:]]
blg_parcel_compare = blg_parcel_np.merge(blg_parcel_p, on='parcel_id', how='outer')
display(blg_parcel_compare.head())

blg_parcel_compare.to_csv(os.path.join(export_f, 'building_data_parcel_compare_{}.csv'.format(year)), index=False)

read 1873606 building_data X:\bayarea_urbansim\runs\run290_building_data_2050.csv, with 1850343 unique parcel_id


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


1850343


Unnamed: 0,parcel_id,resUnits_dev,unit_price_mean_dev,resUnits_nondev,unit_price_mean_nondev
0,26,7.0,661069.099758,,
1,98,439.0,727500.781104,,
2,181,435.0,803843.19284,,
3,191,0.0,,,
4,193,0.0,,,


read 1873531 building_data X:\bayarea_urbansim\runs\run292_building_data_2050.csv, with 1850320 unique parcel_id


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


1850320


Unnamed: 0,parcel_id,resUnits_dev,unit_price_mean_dev,resUnits_nondev,unit_price_mean_nondev
0,104,143.0,730923.178547,,
1,191,0.0,,,
2,192,124.0,813936.463202,,
3,193,0.0,,,
4,195,88.0,811412.074314,,


Unnamed: 0,parcel_id,resUnits_dev_NP,unit_price_mean_dev_NP,resUnits_nondev_NP,unit_price_mean_nondev_NP,resUnits_dev_P,unit_price_mean_dev_P,resUnits_nondev_P,unit_price_mean_nondev_P
0,26,7.0,661069.099758,,,,,0.0,
1,98,439.0,727500.781104,,,,,0.0,
2,181,435.0,803843.19284,,,,,0.0,
3,191,0.0,,,,0.0,,,
4,193,0.0,,,,0.0,,,
