- produce csv output summary of daysim results for use in tableau and other sources

In [63]:
import pandas as pd
import h5py
% matplotlib inline

In [197]:
survey_h5 = h5py.File(r'R:\SoundCast\Inputs\2014\etc\survey.h5')
daysim_h5 = h5py.File(r'R:\SoundCast\releases\TransportationFutures2010\outputs\daysim_outputs.h5')

scenario_name = 'new_run'

In [290]:
# Load lookup files
districts = pd.read_csv(r'data/district_lookup.csv')

In [198]:
def h5_to_df(h5file, table_list, name=False):
    """
    Load h5-formatted data based on a table list. Assumes heirarchy of a set of tables.
    """
    output_dict = {}
    
    for table in table_list:
        df = pd.DataFrame()
        for field in h5file[table].keys():
            df[field] = h5file[table][field][:]
            
        output_dict[table] = df
    
    if name:
        output_dict['name'] = name
    
    return output_dict

In [199]:
survey = h5_to_df(survey_h5, table_list=['Household','Trip','Tour','Person','HouseholdDay'], name='survey')
daysim = h5_to_df(daysim_h5, table_list=['Household','Trip','Tour','Person','HouseholdDay'], name=scenario_name)

In [200]:
# # Add unique person ID 
# survey['Trip']['unique_id'] = survey['Trip']['hhno'].astype('str') + '_' + survey['Trip']['pno'].astype('str')
# survey['Person']['unique_id'] = survey['Person']['hhno'].astype('str') + '_' + survey['Person']['pno'].astype('str')

# daysim['Trip']['unique_id'] = daysim['Trip']['hhno'].astype('str') + '_' + daysim['Trip']['pno'].astype('str')
# daysim['Person']['unique_id'] = daysim['Person']['hhno'].astype('str') + '_' + daysim['Person']['pno'].astype('str')

## Add Labels

In [201]:
# apply labels all based on table, field, and value
labels = pd.read_csv(r'variable_labels.csv')
labels_applied=False
# Only run this once, set labels_applied = False in loading 
if not labels_applied:
    for table in labels['table'].unique():
        df = labels[labels['table'] == table]
        for field in df['field'].unique():
            newdf = df[df['field'] == field]
            local_series = pd.Series(newdf['text'].values, index=newdf['value'])
            survey[table][field] = survey[table][field].map(local_series)
            daysim[table][field] = daysim[table][field].map(local_series)

# Only run this once
labels_applied = True

## Define dataset
- model or observed

In [439]:
# dataset = daysim
dataset = survey


## Aggregate Measures
 - total persons, households, avg household size, avg trips/person, vmt per person, avg distance to work, avg distance to school

In [203]:
df = pd.DataFrame()

In [204]:
def add_row(df, row_name, description, value):
    df.ix[row_name,'description'] = description
    df.ix[row_name,'value'] = value
    
    return df

In [205]:
# Total Persons
df = add_row(df, row_name='total_persons', description='Total Persons', value=dataset['Person']['psexpfac'].sum())

In [206]:
# Total Households
df = add_row(df, row_name='total_hhs', description='Total Households', value=dataset['Household']['hhexpfac'].sum())

In [207]:
# Average Household Size
avg_hh_size = (dataset['Household']['hhsize']*dataset['Household']['hhexpfac']).sum()/dataset['Household']['hhexpfac'].sum()
df = add_row(df, row_name='avg_hh_size', description='Average Household Size', value=avg_hh_size)

In [208]:
# Average Trips per Person
trips_per_person = dataset['Trip']['trexpfac'].sum()/dataset['Person']['psexpfac'].sum()
df = add_row(df, row_name='trips_per_person', description='Average Trips per Person', value=trips_per_person)

In [209]:
# Average Trip Length
trip_len = (dataset['Trip']['travdist']*dataset['Trip']['trexpfac']).sum()/dataset['Trip']['trexpfac'].sum()
df = add_row(df, row_name='trip_len', description='Average Trips Length', value=trip_len)

In [210]:
# VMT per capita
driver_trips = dataset['Trip'][dataset['Trip']['dorp'] == 'Driver']
vmt_per_cap = (driver_trips['travdist']*driver_trips['trexpfac']).sum()/dataset['Person']['psexpfac'].sum()
df = add_row(df, row_name='vmt_per_cap', description='VMT per Person', value=vmt_per_cap)

In [211]:
# Average distance to work
to_work_tours = dataset['Tour'][dataset['Tour']['pdpurp'] == 'Work']
dist_to_work = (to_work_tours['tautodist']*to_work_tours['toexpfac']).sum()/to_work_tours['toexpfac'].sum()
df = add_row(df, row_name='dist_to_work', description='Avg Distance to Work', value=dist_to_work)

In [212]:
# Average distance to school
to_school_tours = dataset['Tour'][dataset['Tour']['pdpurp'] == 'School']
dist_to_school = (to_school_tours['tautodist']*to_school_tours['toexpfac']).sum()/to_school_tours['toexpfac'].sum()
df = add_row(df, row_name='dist_to_school', description='Avg Distance to School', value=dist_to_school)

In [213]:
# Transit Pass Ownership

In [214]:
# Total Boardings

In [215]:
# Total Workers

In [216]:
# % of workers working at home

In [217]:
# Work-at-home workers 

In [220]:
# add field specif
df['source'] = dataset['name']

# Write this summary to csv

df.to_csv(dataset['name'] + '_' + 'aggregate_measures.csv')

## Destination Choice Report



In [440]:
tour = dataset['Tour']
trip = dataset['Trip']
hh = dataset['Household']
person = dataset['Person']

## Results by Trip/Tour Purpose
- distance by tour purpose
- distance by trip purpose
- trips per tour by tour purpose
- number of tours by purpose
- tour mode share
- trip mode share

In [430]:
# Tour distance by purpose
tour['tautodist_wt'] = tour['toexpfac']*tour['tautodist']
dist_by_tour_purp = pd.DataFrame(tour.groupby('pdpurp').sum()['tautodist_wt']/tour.groupby('pdpurp').sum()['toexpfac'],
                                columns=['dist_by_tour_purp'])
dist_by_tour_purp['purpose'] = dist_by_tour_purp.index

In [431]:
# Trip distance by purpose
trip['travdist_wt'] = trip['trexpfac']*trip['travdist']
dist_by_trip_purp = pd.DataFrame(trip.groupby('dpurp').sum()['travdist_wt']/trip.groupby('dpurp').sum()['trexpfac'],
                                columns=['dist_by_trip_purp'])
dist_by_trip_purp['purpose'] = dist_by_trip_purp.index

In [432]:
# Trips per tour



In [433]:
# number of total tours generated by purpose
total_tours = pd.DataFrame(tour.groupby('pdpurp').sum()['toexpfac'])
total_tours.rename(columns={'toexpfac':'total_tours'},inplace=True)
total_tours['purpose'] = total_tours.index
total_tours.reset_index(inplace=True, drop=True)

In [434]:
# trip mode share
trips_by_mode = pd.pivot_table(trip,values='trexpfac',index='dpurp',columns='mode',aggfunc='sum')
trips_by_mode.columns = [i+'_trips' for i in trips_by_mode.columns]
trips_by_mode['purpose'] = trips_by_mode.index
trips_by_mode.reset_index(inplace=True, drop=True)

In [435]:
trips_by_mode

Unnamed: 0,Bike_trips,HOV2_trips,HOV3+_trips,SOV_trips,School Bus_trips,Transit_trips,Walk_trips,purpose
0,,902,,38111,,39013,,Change Mode Inserted Purpose
1,5534.0,565471,693146.0,353230,8295.0,4720,93805.0,Escort
2,5301.0,262273,159657.0,462513,10521.0,15437,120710.0,Meal
3,44286.0,1204584,997423.0,2304275,181962.0,122259,370545.0,None/Home
4,13876.0,436261,332701.0,1095926,19721.0,28100,151499.0,Personal Business
5,13077.0,254285,297501.0,118570,156534.0,20493,93311.0,School
6,5752.0,163079,111754.0,424008,12373.0,14819,83967.0,Shop
7,9009.0,324966,210053.0,601633,8861.0,17754,153761.0,Social
8,19431.0,125256,70771.0,1678522,345.0,114243,151591.0,Work


In [436]:
# tour mode share
tours_by_mode = pd.pivot_table(tour,values='toexpfac',index='pdpurp',columns='tmodetp',aggfunc='sum')
tours_by_mode.columns = [i+'_tours' for i in tours_by_mode.columns]
tours_by_mode['purpose'] = tours_by_mode.index
tours_by_mode.reset_index(inplace=True, drop=True)
tours_by_mode

Unnamed: 0,Bike_tours,HOV2_tours,HOV3+_tours,Park_tours,SOV_tours,School Bus_tours,Transit_tours,Walk_tours,purpose
0,2679,309456,424529,,12025,,158,42555,Escort
1,3083,199476,102124,,187469,,8908,77718,Meal
2,8088,170718,106945,,394193,,15318,31971,Personal Business
3,12523,118772,240041,,82582,267858.0,27064,72707,School
4,3347,66685,35131,,173360,,6324,32042,Shop
5,7638,282242,161012,,377094,,17958,119549,Social
6,15020,182143,85321,39013.0,957005,,102249,31486,Work


In [437]:
# merge all dataframes with data by purpose
df_list = [dist_by_tour_purp, dist_by_trip_purp, total_tours, tours_by_mode, trips_by_mode]
purpose_df = reduce(lambda left,right: pd.merge(left,right,on='purpose'), df_list)

# add field specif
purpose_df['source'] = dataset['name']
purpose_df.to_csv(dataset['name'] + '_' + 'purpose_df.csv')

## Results By Mode
- distance by tour mode
- trips per tour by tour mode
- trip mode by tour mode
- number of tours by mode
- travel time by mode

In [441]:
# Tour distance by purpose
tour['tautodist_wt'] = tour['toexpfac']*tour['tautodist']
dist_by_tour_mode = pd.DataFrame(tour.groupby('tmodetp').sum()['tautodist_wt']/tour.groupby('tmodetp').sum()['toexpfac'],
                                columns=['dist_by_tour_mode'])
dist_by_tour_mode['mode'] = dist_by_tour_mode.index

In [442]:
# Trip distance by purpose
trip['travdist_wt'] = trip['trexpfac']*trip['travdist']
dist_by_trip_mode = pd.DataFrame(trip.groupby('mode').sum()['travdist_wt']/trip.groupby('mode').sum()['trexpfac'],
                                columns=['dist_by_trip_mode'])
dist_by_trip_mode['mode'] = dist_by_trip_mode.index

In [443]:
# Trips per tour by mode


In [444]:
# trip mode by tour mode

In [445]:
# merge all dataframes with data by purpose
df_list = [dist_by_tour_mode, dist_by_trip_mode]
mode_df = reduce(lambda left,right: pd.merge(left,right,on='mode'), df_list)

# add field specif
mode_df['source'] = dataset['name']
mode_df.to_csv(dataset['name'] + '_' + 'mode_df.csv')

## Results By District
- tours by destination district
- trips by destination district
- residents by district

In [424]:
# Trips by Destination District
trip_district = pd.merge(trip[['dtaz','trexpfac']],districts,left_on='dtaz',right_on='taz')
trip_district_df = pd.DataFrame(trip_district.groupby('district_name')['trexpfac'].sum())
trip_district_df.rename(columns={'trexpfac':'trips'},inplace=True)
trip_district_df.reset_index(inplace=True)

In [425]:
# Tours by Destination District
tour_district = pd.merge(tour[['tdtaz','toexpfac']],districts,left_on='tdtaz',right_on='taz')
tour_district_df = pd.DataFrame(tour_district.groupby('district_name')['toexpfac'].sum())
tour_district_df.rename(columns={'toexpfac':'tours'},inplace=True)
tour_district_df.reset_index(inplace=True)

In [426]:
# Residents by District
hh_per_district = pd.merge(hh[['hhtaz','hhsize','hhexpfac','hhno']], districts, left_on='hhtaz', right_on='taz')

hh_per_district['wt_hhsize'] = hh_per_district['hhsize']*hh_per_district['hhexpfac']
residents_df = pd.DataFrame(hh_per_district.groupby('district_name').sum()['wt_hhsize'])
residents_df.rename(columns={'wt_hhsize':'residents'},inplace=True)
residents_df.reset_index(inplace=True)

In [363]:
district_df = pd.merge(pd.merge(trip_district_df,tour_district_df,on='district_name'), residents_df,on='district_name')

In [None]:
# Merge these all together

## Time of Day