## Import libraries

In [1]:
import os
import sys
import time
import pandas as pd
import arcpy
from arcgis.features import GeoAccessor, GeoSeriesAccessor

In [2]:
# show all columns
pd.options.display.max_columns = None

## Load base year H5 and show all tables

In [3]:
# Read in hdf5 
store = pd.HDFStore('.\\Inputs\\remm_data_2015_base_year_02082019.h5')
tables = list(store.keys())
tables

['/buildings',
 '/buildings_for_estimation',
 '/buildings_for_estimation_grouped',
 '/employment_controls',
 '/household_controls',
 '/household_for_estimation',
 '/households',
 '/households_for_estimation',
 '/households_for_estimation1',
 '/jobs',
 '/parcels',
 '/travel_data',
 '/valid_parcels',
 '/zoning',
 '/zoning_base_line',
 '/zoning_baseline',
 '/zoning_for_parcels']

In [4]:
%%html
<style>
table {float:left}
</style>

## Crosswalk County IDs to County FIPS codes

#### Tasks
Updated County ID in following tables:
- parcels
- jobs
- households
- population_controls.csv
- household_controls.csv
- employment_controls.csv
- tazctyeq.csv 


#### Files in h5 store that aren't being used by REMM
- household_controls (csv file is used instead)
- population_controls (csv file is used instead)
- employment_controls (csv file is used instead)


#### Identify parcels with 0 as county ID and identify their id (use TAZ)


 
 # Check Weber/Davis ids vs fips

**Crosswalk Table**  

| Name       | County_ID | CID  | TAZCTYEQ | FIPS |
|------------|-----------|------|----------|------|
| Box Elder  |   5       |  NA  |    5     |  3   |
| Weber      |   3       |  3   |    1     |  57  |  
| Davis      |   1       |  1   |    2     |  11  |
| Salt Lake  |   2       |  2   |    3     |  35  |
| Utah       |   4       |  4   |    4     |  49  |


In [5]:
# Create crosswalk dictionary for county values
crosswalk1 = {1:11, 2:35, 3:57, 4:49, 5:3, 0:0}

# Create crosswalk for TAZCTYEQ csv
crosswalk2 = {1:57, 3:35, 2:11, 4:49, 5:3, 0:0} 

### Work on parcels

In [6]:
# get the parcels table from the store
parcels = store.get('parcels').reset_index()
parcels.head(5)

Unnamed: 0.1,parcel_id,Unnamed: 0,county_id,zone_id,parcel_acres,land_value,x,y,elevation,fwy_exit,airport,rail_depot,stream,trail,university,shape_area,volume_one_way,volume_two_way,airport_distance,fwy_exit_dist,raildepot_dist,university_dist,trail_dist,stream_dist,train_station,rail_stn_dist,bus_rte_dist,bus_stop,bus_stop_dist,volume_two_way_nofwy,distsml_id,distmed_id,distlrg_id,parent_parcel,CO_NAME,parcel_id_REMM,county_taz_id,utmxi,utmyi,city,zonal_ppa
0,741871,0,4,2111,0.197968,35700.0,1531239.186,7330722.151,1447.53745,0.0,0.0,0.0,1.0,0.0,0.0,8623.484,992.0,1942.0,18.02655,0.85831,12.61234,14.73993,0.09764,0.0,0.0,8.748286,2382.31,0.0,0.882466,1942.0,487,38,13,741871,UTAH,741871,492111,424331.7633,4477427.732,Lehi,176926.527483
1,579822,1,4,2873,37.717834,56614.32,1535437.548,7341595.932,1657.284652,0.0,0.0,0.0,1.0,0.0,0.0,1642989.0,3564.0,7135.0,19.48828,1.32984,10.63629,15.94851,0.23207,0.0,0.0,6.691816,4930.5,0.0,1.16411,7135.0,0,0,0,579822,UTAH,579822,492873,425629.9856,4480733.739,Utah County,140869.334754
2,579853,2,4,2117,6.791235,81500.0,1534101.235,7334630.179,1657.284652,0.0,0.0,0.0,1.0,0.0,0.0,295826.2,0.0,0.0,18.42054,1.66976,11.91035,15.00142,0.55081,0.0,0.0,7.994607,6770.95,0.0,1.661999,0.0,487,38,13,579853,UTAH,579853,492117,425210.6619,4478613.554,Lehi,199096.57513
3,640185,3,4,2073,1.372797,112100.0,1538253.186,7322787.258,1445.761763,0.0,0.0,0.0,1.0,0.0,0.0,59799.02,2495.0,5043.0,16.06753,0.60111,14.23267,12.7367,0.04966,0.0,0.0,10.277766,2627.34,0.0,0.548186,5043.0,482,38,13,640185,UTAH,640185,492073,426455.1775,4474997.689,Lehi,169098.553773
4,682698,4,4,2119,0.163751,43000.0,1548648.398,7336198.043,1766.927829,0.0,0.0,0.0,1.0,0.0,0.0,7132.996,3632.0,7133.0,17.61362,3.7488,12.20122,13.84603,0.37882,0.0,0.0,8.210217,9997.32,0.0,2.129948,7133.0,488,39,14,682698,UTAH,682698,492119,429646.0053,4479065.962,Draper,254583.603528


In [7]:
# Temporarily crosswalk values and get a value count
parcels['county_id'].map(crosswalk1).value_counts()

35    368139
49    250210
11    113643
57    109672
0       3555
Name: county_id, dtype: int64

In [8]:
# read in parcels with county ids; 'parcel_id_' is 'parcel_id_REMM'; it was truncated
# parcels_county = gpd.read_file('.\\Data\\parcels_county_id.shp')
parcels_county = pd.DataFrame.spatial.from_featureclass('.\\Inputs\\parcels_county_id.shp')
parcels_county = parcels_county[['parcel_id_', 'FIPS']].copy()

In [9]:
print(parcels_county.shape)
print(parcels.shape)

(845219, 2)
(845219, 41)


In [10]:
# join resulting table back to original parcels table, keep new county fips column
parcels_converted = parcels.merge(parcels_county, left_on = 'parcel_id_REMM', right_on = 'parcel_id_' , how = 'inner')

# Store old cid for reference
parcels_converted['cid_old'] = parcels_converted['county_id']

# Replace old cid with FIPS codes
parcels_converted['county_id'] = parcels_converted['FIPS'].astype(int)

# Delete unneeded columns
del parcels_converted['FIPS']
del parcels_converted['parcel_id_']
del parcels_converted['Unnamed: 0']

# Check value counts
parcels_converted['county_id'].value_counts()

35    368588
49    253097
11    113748
57    109733
23        30
3         23
Name: county_id, dtype: int64

In [11]:
parcels_converted

Unnamed: 0,parcel_id,county_id,zone_id,parcel_acres,land_value,x,y,elevation,fwy_exit,airport,rail_depot,stream,trail,university,shape_area,volume_one_way,volume_two_way,airport_distance,fwy_exit_dist,raildepot_dist,university_dist,trail_dist,stream_dist,train_station,rail_stn_dist,bus_rte_dist,bus_stop,bus_stop_dist,volume_two_way_nofwy,distsml_id,distmed_id,distlrg_id,parent_parcel,CO_NAME,parcel_id_REMM,county_taz_id,utmxi,utmyi,city,zonal_ppa,cid_old
0,741871,49,2111,0.197968,35700.00,1531239.186,7330722.151,1447.537450,0.0,0.0,0.0,1.0,0.0,0.0,8.623484e+03,992.0,1942.0,18.02655,0.85831,12.61234,14.73993,0.09764,0.00000,0.0,8.748286,2382.31,0.0,0.882466,1942.0,487,38,13,741871,UTAH,741871,492111,424331.7633,4477427.732,Lehi,176926.527483,4
1,579822,49,2873,37.717834,56614.32,1535437.548,7341595.932,1657.284652,0.0,0.0,0.0,1.0,0.0,0.0,1.642989e+06,3564.0,7135.0,19.48828,1.32984,10.63629,15.94851,0.23207,0.00000,0.0,6.691816,4930.50,0.0,1.164110,7135.0,0,0,0,579822,UTAH,579822,492873,425629.9856,4480733.739,Utah County,140869.334754,4
2,579853,49,2117,6.791235,81500.00,1534101.235,7334630.179,1657.284652,0.0,0.0,0.0,1.0,0.0,0.0,2.958262e+05,0.0,0.0,18.42054,1.66976,11.91035,15.00142,0.55081,0.00000,0.0,7.994607,6770.95,0.0,1.661999,0.0,487,38,13,579853,UTAH,579853,492117,425210.6619,4478613.554,Lehi,199096.575130,4
3,640185,49,2073,1.372797,112100.00,1538253.186,7322787.258,1445.761763,0.0,0.0,0.0,1.0,0.0,0.0,5.979902e+04,2495.0,5043.0,16.06753,0.60111,14.23267,12.73670,0.04966,0.00000,0.0,10.277766,2627.34,0.0,0.548186,5043.0,482,38,13,640185,UTAH,640185,492073,426455.1775,4474997.689,Lehi,169098.553773,4
4,682698,49,2119,0.163751,43000.00,1548648.398,7336198.043,1766.927829,0.0,0.0,0.0,1.0,0.0,0.0,7.132996e+03,3632.0,7133.0,17.61362,3.74880,12.20122,13.84603,0.37882,0.00000,0.0,8.210217,9997.32,0.0,2.129948,7133.0,488,39,14,682698,UTAH,682698,492119,429646.0053,4479065.962,Draper,254583.603528,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
845214,886695,49,2666,4.531522,48006.00,1574258.679,7220957.846,1372.488928,0.0,0.0,0.0,0.0,0.0,0.0,1.973931e+05,107.0,214.0,5.32442,4.38052,6.67391,8.88569,3.24828,0.01905,0.0,30.544293,19622.91,0.0,5.026164,214.0,546,44,16,601655,UTAH,886695,492666,437250.4157,4443906.735,Lake Shore,111196.498940,4
845215,886696,49,2666,3.158082,33456.00,1573684.703,7220955.159,1372.488928,0.0,0.0,0.0,0.0,0.0,0.0,1.375661e+05,107.0,214.0,5.32442,4.38052,6.67391,8.88569,3.24828,0.01905,0.0,30.544293,19622.91,0.0,5.026164,214.0,546,44,16,601655,UTAH,886696,492666,437075.5120,4443906.904,Lake Shore,111196.498940,4
845216,886697,49,2666,4.895412,51862.00,1573301.933,7220958.410,1372.488928,0.0,0.0,0.0,0.0,0.0,0.0,2.132442e+05,107.0,214.0,5.32442,4.38052,6.67391,8.88569,3.24828,0.01905,0.0,30.544293,19622.91,0.0,5.026164,214.0,546,44,16,601655,UTAH,886697,492666,436958.8819,4443908.552,Lake Shore,111196.498940,4
845217,886698,49,2666,4.918818,52109.00,1572835.592,7220958.477,1372.488928,0.0,0.0,0.0,0.0,0.0,0.0,2.142637e+05,107.0,214.0,5.32442,4.38052,6.67391,8.88569,3.24828,0.01905,0.0,30.544293,19622.91,0.0,5.026164,214.0,546,44,16,601655,UTAH,886698,492666,436816.7808,4443909.375,Lake Shore,111196.498940,4


### Work on households_for_estimation

In [12]:
households_for_estimation = pd.read_csv(".\\Inputs\\households_for_estimation.csv").set_index('household_id')
households_for_estimation.head()

Unnamed: 0_level_0,cars,household_type_id,persons,income,workers,children,tenure,recent_mover,building_type_id,block_id,building_id,proportion_workers,zone_id,parcel_id,node_id,income_quartile,county_id
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
85,2,1,2,87500,2,0,2,1,1,17049,259924,1.0,1074,152309,6119734,3,2
86,3,1,3,-1,3,0,2,0,1,6857,440101,1.0,1607,347510,52501442,1,2
87,2,2,4,42500,2,2,2,0,1,13832,321465,0.5,1095,224414,30362517,2,2
88,3,1,3,125000,2,0,2,0,1,4031,83948,0.666667,379,390199,6082196,4,2
89,2,2,5,87500,2,3,2,0,1,8950,221670,0.4,1123,106020,6146478,3,1


In [13]:
# copy  the table
households_for_estimation_converted = households_for_estimation.copy()

# reclassify county values
households_for_estimation_converted['county_id'] = households_for_estimation_converted['county_id'].map(crosswalk1)

### Work on jobs

In [14]:
# get the jobs table from the store
jobs = store.get('jobs').reset_index()

# count instances of each county value
jobs['cid'].value_counts()

2    755026
4    272247
1    152689
3    115551
Name: cid, dtype: int64

In [15]:
# Create crosswalk dictionary for county values
# crosswalk = {2:11, 3:35, 1:57, 4:49, 0:0}

# copy  the table
jobs_converted = jobs.copy()

# reclassify county values
jobs_converted['cid'] = jobs['cid'].map(crosswalk1)

# count instances of each county value
jobs_converted['cid'].value_counts()

35    755026
49    272247
11    152689
57    115551
Name: cid, dtype: int64

### Valid Parcels

In [16]:
valid_parcels = pd.read_csv(".\\Inputs\\valid_parcels.csv").set_index('parcel_id')
valid_parcels.head()

# copy  the table
valid_parcels_converted = valid_parcels.copy()

# reclassify county values
valid_parcels_converted['county_id'] = valid_parcels_converted['county_id'].map(crosswalk1)

# count instances of each county value
valid_parcels_converted['county_id'].value_counts()

35    344428
49    181128
11    106909
57     92595
Name: county_id, dtype: int64

### Work on households

In [17]:
# get the households table from the store
households = store.get('households').reset_index()

# count instances of each county value
households['cid'].value_counts()

2    377727
4    163909
1    106535
3     83221
Name: cid, dtype: int64

## Travel Data

In [4]:
travel_data = store.get('travel_data')

In [5]:
travel_data

Unnamed: 0_level_0,Unnamed: 1_level_0,travel_time,travel_time_transit,log0,log1,log2
from_zone_id,to_zone_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,3.15,5000.0,2.46,-0.19,-0.47
1,2,5.14,5000.0,1.03,-0.63,-0.53
1,3,4.80,5000.0,0.46,-0.59,-0.45
1,4,6.64,5000.0,-0.34,-0.67,-0.53
1,5,6.75,5000.0,-0.48,-0.67,-0.53
...,...,...,...,...,...,...
2881,2877,104.93,5000.0,-0.48,-0.48,-0.48
2881,2878,65.18,5000.0,-0.48,-0.48,-0.48
2881,2879,67.14,5000.0,-0.48,-0.48,-0.48
2881,2880,69.89,5000.0,-0.48,-0.48,-0.48


In [19]:
# copy  the table
households_converted = households.copy()

# reclassify county values
households_converted['cid'] = households_converted['cid'].map(crosswalk1)

# count instances of each county value
households_converted['cid'].value_counts()

35    377727
49    163909
11    106535
57     83221
Name: cid, dtype: int64

### Work on household_controls

In [20]:
# get the household_controls table from the csv
household_controls = pd.read_csv("./Inputs/household_controls.csv")

# count instances of each county value
household_controls['cid'].value_counts()

4    204
3    204
2    204
1    204
Name: cid, dtype: int64

In [21]:
# copy  the table
household_controls_converted = household_controls.copy()

# reclassify county values
household_controls_converted['cid'] = household_controls_converted['cid'].map(crosswalk1)

# count instances of each county value
household_controls_converted['cid'].value_counts()

57    204
49    204
35    204
11    204
Name: cid, dtype: int64

### Employment_Controls

In [22]:
# get the household_controls table from the csv
employment_controls = pd.read_csv("./Inputs/employment_controls.csv")

# count instances of each county value
employment_controls['cid'].value_counts()

3    612
1    612
4    612
2    612
Name: cid, dtype: int64

In [23]:
# Create crosswalk dictionary for county values
# crosswalk = {1:11, 2:35, 3:57, 4:49, 0:0}

# copy  the table
employment_controls_converted = employment_controls.copy()

# reclassify county values
employment_controls_converted['cid'] = employment_controls_converted['cid'].map(crosswalk1)

employment_controls_converted['cid'].value_counts()

57    612
49    612
35    612
11    612
Name: cid, dtype: int64

### Population_Controls

In [24]:
pop_controls = pd.read_csv("./Inputs/population_controls.csv")
pop_controls.head()

Unnamed: 0,year,number_of_population,cid
0,2010,304323,1
1,2010,1013395,2
2,2010,222446,3
3,2010,503812,4
4,2011,309937,1


In [25]:
# count instances of each county value
pop_controls['cid'].value_counts()

4    51
3    51
2    51
1    51
Name: cid, dtype: int64

In [26]:
# copy  the table
pop_controls_converted = pop_controls.copy()

# reclassify county values
pop_controls_converted['cid'] = pop_controls_converted['cid'].map(crosswalk1)

pop_controls_converted['cid'].value_counts()

57    51
49    51
35    51
11    51
Name: cid, dtype: int64

### tazctyeq

In [27]:
tazctyeq = pd.read_csv(r".\\Inputs\TAZCTYEQ.csv")

# copy  the table
tazctyeq_converted = tazctyeq.copy()

# reclassify county values
tazctyeq_converted['COUNTY'] = tazctyeq['COUNTY'].map(crosswalk2)

In [28]:
tazctyeq_converted['COUNTY'].value_counts()

35    1134
49    1085
57     283
11     231
3      140
Name: COUNTY, dtype: int64

## Rename *cid* and *county_id* attributes to *CO_FIPS* (DISABLED FOR NOW)

In [29]:
# renames field names in place
def renameAttribute(dataframe,incolumns,outcolumn):
    for column in incolumns:
        if column in list(dataframe.columns):
            dataframe.rename(columns={column:outcolumn},inplace=True)
            print('"{}" was renamed to "{}"'.format(column, outcolumn)) 
        else:
            print('"{}" was not present in the table'.format(column)) 

In [30]:
# # rename attributes
# renameAttribute(household_controls_converted,['cid','county_id'],'CO_FIPS')
# renameAttribute(employment_controls_converted,['cid','county_id'],'CO_FIPS')
# renameAttribute(tazctyeq_converted,['cid','county_id', 'COUNTY'],'CO_FIPS')
# renameAttribute(pop_controls_converted,['cid','county_id'],'CO_FIPS')
# renameAttribute(households_converted,['cid','county_id'],'CO_FIPS')
# renameAttribute(households_for_estimation_converted,['cid','county_id'],'CO_FIPS')
# renameAttribute(jobs_converted,['cid','county_id'],'CO_FIPS')
# renameAttribute(parcels_converted,['cid','county_id'],'CO_FIPS')
# renameAttribute(valid_parcels_converted,['cid','county_id'],'CO_FIPS')

In [31]:
parcels_converted[parcels_converted['cid_old']==2].head()

Unnamed: 0,parcel_id,county_id,zone_id,parcel_acres,land_value,x,y,elevation,fwy_exit,airport,rail_depot,stream,trail,university,shape_area,volume_one_way,volume_two_way,airport_distance,fwy_exit_dist,raildepot_dist,university_dist,trail_dist,stream_dist,train_station,rail_stn_dist,bus_rte_dist,bus_stop,bus_stop_dist,volume_two_way_nofwy,distsml_id,distmed_id,distlrg_id,parent_parcel,CO_NAME,parcel_id_REMM,county_taz_id,utmxi,utmyi,city,zonal_ppa,cid_old
784,759,35,753,0.10538,15696.48,1524599.4,7458829.691,1290.891936,0.0,0.0,1.0,0.0,0.0,0.0,4590.350676,56995.0,56995.0,3.02898,0.30639,2.29819,4.68582,0.93363,0.1447,0.0,1.763501,85.394917,0.0,0.320471,3192.0,178,20,7,759,SALT LAKE,759,350753,422534.9085,4516445.824,Salt Lake City,298882.856973,2
1223,177674,35,938,0.114779,55400.0,1505730.894,7402047.716,1395.936744,0.0,0.0,0.0,0.0,0.0,0.0,4999.783745,2508.0,5097.0,10.22282,2.48318,4.3142,11.54738,0.95341,0.23434,0.0,4.60762,286.069333,0.0,0.088703,35230.0,225,22,7,177674,SALT LAKE,177674,350938,416684.1299,4499205.637,Taylorsville,301755.362981,2
1241,177439,35,938,0.202456,74600.0,1506568.857,7403813.28,1395.936744,0.0,0.0,0.0,0.0,0.0,0.0,8818.995405,1393.0,2889.0,9.88423,2.25074,4.24349,11.19576,0.90151,0.41012,0.0,4.494162,1051.98,0.0,0.2623,2889.0,225,22,7,177439,SALT LAKE,177439,350938,416942.5642,4499742.13,Taylorsville,301755.362981,2
1541,127666,35,1301,0.184951,138700.0,1555039.647,7420125.967,1464.237492,0.0,0.0,0.0,0.0,0.0,0.0,8056.444881,1050.0,2143.0,11.06456,0.56751,5.747,5.40322,0.58982,0.32709,0.0,4.610442,913.795246,0.0,0.368686,2143.0,302,26,9,127666,SALT LAKE,127666,351301,431739.9638,4504627.384,Salt Lake County,570370.427781,2
1753,10724,35,763,0.106206,35400.0,1524836.95,7451108.153,1287.756705,0.0,0.0,0.0,0.0,0.0,0.0,4626.351474,6105.0,11818.0,3.1597,0.66114,3.67933,4.16946,0.43499,0.46961,0.0,0.549671,53.040929,0.0,0.100541,11818.0,181,20,7,10724,SALT LAKE,10724,350763,422592.0687,4514120.355,Salt Lake City,240029.419718,2


## Export csvs

In [32]:
employment_controls_converted.to_csv(".\\Outputs\\employment_controls.csv", index=False)
tazctyeq_converted.to_csv(".\\Outputs\\TAZCTYEQ.csv", index=False)
pop_controls_converted.to_csv(".\\Outputs\\population_controls.csv", index=False)
household_controls_converted.to_csv(".\\Outputs\\household_controls.csv", index=False)

## Load tables into a new h5

Table that are not loaded because they exist as csvs:
- household_controls
- employment_controls

Tables that are not loaded because they are unnecessary or duplicates
- household_for_estimation
- households_for_estimation1
- travel_data
- valid_parcels
- zoning_base_line

In [33]:
# store path for new hdf
new_hdf = '.\\Outputs\\remm_data_2015_base_year_11062020.h5'

# if it exists already delete it; it will not overwrite
if os.path.exists(new_hdf):
    try:
        new_hdf.close()
    except:
        pass
    
    os.remove(new_hdf)

# Create empty h5   
hdf = pd.HDFStore(new_hdf)   

In [34]:
# load the new tables to the h5
hdf.put('parcels', parcels_converted.set_index('parcel_id'), format='t', data_columns=True)
hdf.put('jobs', jobs_converted.set_index('jobs_id'), format='t', data_columns=True)
hdf.put('households', households_converted, format='t', data_columns=True)

In [35]:
# load the unchanged tables to the h5
# these are from csv because the h5 file was created in an old version of pandas and some tables cant be accessed
hdf.put('buildings_for_estimation', pd.read_csv(".\\Inputs\\buildings_for_estimation.csv").set_index('building_id'), format='t', data_columns=True)
hdf.put('buildings_for_estimation_grouped', pd.read_csv(".\\Inputs\\buildings_for_estimation_grouped.csv").set_index('building_id'), format='t', data_columns=True)
hdf.put('buildings', pd.read_csv(".\\Inputs\\buildings.csv").set_index('building_id'), format='t', data_columns=True)
hdf.put('households_for_estimation', households_for_estimation_converted, format='t', data_columns=True)
hdf.put('travel_data', travel_data, format='t', data_columns=True)
hdf.put('valid_parcels', valid_parcels_converted, format='t', data_columns=True)
hdf.put('zoning', pd.read_csv(".\\Inputs\\zoning.csv").set_index('id'), format='t', data_columns=True)
hdf.put('zoning_baseline', pd.read_csv(".\\Inputs\\zoning_baseline.csv").set_index('parcel_id'), format='t', data_columns=True)
hdf.put('zoning_for_parcels', pd.read_csv(".\\Inputs\\zoning_for_parcels.csv").set_index('parcel'), format='t', data_columns=True)

In [36]:
# close the h5
hdf.close()

In [3]:
new_hdf = '.\\Outputs\\remm_data_2015_base_year_11062020.h5'
store = pd.HDFStore(new_hdf)
store['households']



Unnamed: 0,household_id,cars,household_type_id,persons,income,workers,children,age_of_head,race_id,familyhh,block_id,cid,building_id
0,48,2,1,2,27356.53846,2,0,20,2,1,4443,11,14091
1,110,1,1,2,22795.18946,2,0,21,1,1,1592,11,81576
2,111,1,1,2,21871.72308,2,0,21,1,1,2230,11,112533
3,112,1,1,2,22795.18946,2,0,21,1,1,2745,11,425457
4,113,1,1,2,26076.39344,2,0,21,1,1,3110,11,41442
...,...,...,...,...,...,...,...,...,...,...,...,...,...
731387,854788,4,1,6,210472.95310,2,1,50,1,1,24855,49,722305
731388,854789,2,1,5,111112.62500,1,1,23,1,1,31302,49,728663
731389,854790,2,1,6,250486.95010,1,1,31,1,1,27511,49,736141
731390,854791,4,1,5,179748.78920,3,1,49,1,1,31310,49,731342


## Code dump

In [38]:
# hdf.put('household_controls', household_controls_converted.set_index('year'), format='t', data_columns=True)
# hdf.put('employment_controls', employment_controls_converted.set_index('year'), format='t', data_columns=True)
# hdf.put('household_for_estimation', pd.read_csv(".\\Data\\household_for_estimation.csv").set_index('household_id'), format='t', data_columns=True) # Not used
# hdf.put('travel_data', pd.read_csv(".\\Data\\travel_data.csv").set_index(['from_zone_id', 'to_zone_id']), format='t', data_columns=True)
# hdf.put('households_for_estimation1', pd.read_csv(".\\Data\\households_for_estimation1.csv").set_index('household_id'), format='t', data_columns=True) # Not used
# hdf.put('zoning_base_line', pd.read_csv(".\\Data\\zoning_base_line.csv").set_index('parcel_id'), format='t', data_columns=True) # Not used
# hdf.put('valid_parcels', pd.read_csv(".\\Data\\valid_parcels.csv").set_index('parcel_id'), format='t', data_columns=True) # used in datasources.py; commented out