- produce csv output summary of daysim results for use in tableau and other sources

In [63]:
import pandas as pd
import h5py
% matplotlib inline

In [197]:
survey_h5 = h5py.File(r'R:\SoundCast\Inputs\2014\etc\survey.h5')
daysim_h5 = h5py.File(r'R:\SoundCast\releases\TransportationFutures2010\outputs\daysim_outputs.h5')

scenario_name = 'new_run'

In [198]:
def h5_to_df(h5file, table_list, name=False):
    """
    Load h5-formatted data based on a table list. Assumes heirarchy of a set of tables.
    """
    output_dict = {}
    
    for table in table_list:
        df = pd.DataFrame()
        for field in h5file[table].keys():
            df[field] = h5file[table][field][:]
            
        output_dict[table] = df
    
    if name:
        output_dict['name'] = name
    
    return output_dict

In [199]:
survey = h5_to_df(survey_h5, table_list=['Household','Trip','Tour','Person','HouseholdDay'], name='survey')
daysim = h5_to_df(daysim_h5, table_list=['Household','Trip','Tour','Person','HouseholdDay'], name=scenario_name)

In [200]:
# # Add unique person ID 
# survey['Trip']['unique_id'] = survey['Trip']['hhno'].astype('str') + '_' + survey['Trip']['pno'].astype('str')
# survey['Person']['unique_id'] = survey['Person']['hhno'].astype('str') + '_' + survey['Person']['pno'].astype('str')

# daysim['Trip']['unique_id'] = daysim['Trip']['hhno'].astype('str') + '_' + daysim['Trip']['pno'].astype('str')
# daysim['Person']['unique_id'] = daysim['Person']['hhno'].astype('str') + '_' + daysim['Person']['pno'].astype('str')

## Add Labels

In [201]:
# apply labels all based on table, field, and value
labels = pd.read_csv(r'variable_labels.csv')
labels_applied=False
# Only run this once, set labels_applied = False in loading 
if not labels_applied:
    for table in labels['table'].unique():
        df = labels[labels['table'] == table]
        for field in df['field'].unique():
            newdf = df[df['field'] == field]
            local_series = pd.Series(newdf['text'].values, index=newdf['value'])
            survey[table][field] = survey[table][field].map(local_series)
            daysim[table][field] = daysim[table][field].map(local_series)

# Only run this once
labels_applied = True

## Define dataset
- model or observed

In [202]:
dataset = daysim
# dataset = survey


## Aggregate Measures
 - total persons, households, avg household size, avg trips/person, vmt per person, avg distance to work, avg distance to school

In [203]:
df = pd.DataFrame()

In [204]:
def add_row(df, row_name, description, value):
    df.ix[row_name,'description'] = description
    df.ix[row_name,'value'] = value
    
    return df

In [205]:
# Total Persons
df = add_row(df, row_name='total_persons', description='Total Persons', value=dataset['Person']['psexpfac'].sum())

In [206]:
# Total Households
df = add_row(df, row_name='total_hhs', description='Total Households', value=dataset['Household']['hhexpfac'].sum())

In [207]:
# Average Household Size
avg_hh_size = (dataset['Household']['hhsize']*dataset['Household']['hhexpfac']).sum()/dataset['Household']['hhexpfac'].sum()
df = add_row(df, row_name='avg_hh_size', description='Average Household Size', value=avg_hh_size)

In [208]:
# Average Trips per Person
trips_per_person = dataset['Trip']['trexpfac'].sum()/dataset['Person']['psexpfac'].sum()
df = add_row(df, row_name='trips_per_person', description='Average Trips per Person', value=trips_per_person)

In [209]:
# Average Trip Length
trip_len = (dataset['Trip']['travdist']*dataset['Trip']['trexpfac']).sum()/dataset['Trip']['trexpfac'].sum()
df = add_row(df, row_name='trip_len', description='Average Trips Length', value=trip_len)

In [210]:
# VMT per capita
driver_trips = dataset['Trip'][dataset['Trip']['dorp'] == 'Driver']
vmt_per_cap = (driver_trips['travdist']*driver_trips['trexpfac']).sum()/dataset['Person']['psexpfac'].sum()
df = add_row(df, row_name='vmt_per_cap', description='VMT per Person', value=vmt_per_cap)

In [211]:
# Average distance to work
to_work_tours = dataset['Tour'][dataset['Tour']['pdpurp'] == 'Work']
dist_to_work = (to_work_tours['tautodist']*to_work_tours['toexpfac']).sum()/to_work_tours['toexpfac'].sum()
df = add_row(df, row_name='dist_to_work', description='Avg Distance to Work', value=dist_to_work)

In [212]:
# Average distance to school
to_school_tours = dataset['Tour'][dataset['Tour']['pdpurp'] == 'School']
dist_to_school = (to_school_tours['tautodist']*to_school_tours['toexpfac']).sum()/to_school_tours['toexpfac'].sum()
df = add_row(df, row_name='dist_to_school', description='Avg Distance to School', value=dist_to_school)

In [213]:
# Transit Pass Ownership

In [214]:
# Total Boardings

In [215]:
# Total Workers

In [216]:
# % of workers working at home

In [217]:
# Work-at-home workers 

In [220]:
# add field specif
df['source'] = dataset['name']

# Write this summary to csv

df.to_csv(dataset['name'] + '_' + 'aggregate_measures.csv')

## Destination Choice Report
- distance by tour purpose
- distance by tour mode
- trips per tour by tour purpose
- trips per tour by tour mode
- distance by trip purpose
- distance by trip purpose
- tours by destination district
- trips by destination district
- residents by district

## Day Pattern
- Tours by purpose distribution


## Mode Choice
- tour mode share
- tour mode share by purpose
- trip mode by tour mode
- tour by mode and travel time

## Time of Day