- produce csv output summary of daysim results for use in tableau and other sources

In [7]:
import os
import pandas as pd
import h5py
% matplotlib inline

In [8]:
def h5_to_df(h5file, table_list, name=False):
    """
    Load h5-formatted data based on a table list. Assumes heirarchy of a set of tables.
    """
    output_dict = {}
    
    for table in table_list:
        df = pd.DataFrame()
        for field in h5file[table].keys():
            df[field] = h5file[table][field][:]
            
        output_dict[table] = df
    
    if name:
        output_dict['name'] = name
    
    return output_dict

In [9]:
def add_row(df, row_name, description, value):
    df.ix[row_name,'description'] = description
    df.ix[row_name,'value'] = value
    
    return df

In [292]:
labels = pd.read_csv(r'variable_labels.csv')
districts = pd.read_csv(r'data/district_lookup.csv')

In [11]:
# list of runs to add to the analysis, to come from the script argument (?)

In [164]:
def apply_lables(h5data):
    '''
    Replace daysim formatted values with human readable lablels.
    '''
    for table in labels['table'].unique():
        df = labels[labels['table'] == table]
        for field in df['field'].unique():
            newdf = df[df['field'] == field]
            local_series = pd.Series(newdf['text'].values, index=newdf['value'])
            h5data[table][field] = h5data[table][field].map(local_series)
    
    return h5data

In [295]:
def process_dataset(h5file, scenario_name):
    
    # Process all daysim results
    
    # Load h5 data as dataframes
    dataset = h5_to_df(h5file, table_list=['Household','Trip','Tour','Person','HouseholdDay'], name=scenario_name)

    dataset = apply_lables(dataset)
    
    # Calculate aggregate measures csv
    agg_df = agg_measures(dataset)
    write_csv(agg_df,fname='agg_measures.csv')

    tours_df = tours(dataset)
    write_csv(tours_df,fname='tours.csv')
    
    taz_df = taz_tours(dataset)
    write_csv(taz_df,fname='taz_tours.csv')
    
    trips_df = trips(dataset)
    write_csv(trips_df, fname='trips.csv')
    

In [14]:
def write_csv(df,fname):
    '''
    Write dataframe to file; append existing file
    '''
#     df.to_csv(os.path.join(output_dir,fname),mode='a')
    if not os.path.isfile(os.path.join(output_dir,fname)):
        df.to_csv(os.path.join(output_dir,fname))
    else: # append without writing the header
        df.to_csv(os.path.join(output_dir,fname), mode ='a', header=False)

In [61]:
def agg_measures(dataset):
    df = pd.DataFrame()

    # Total Persons
    df = add_row(df, row_name='total_persons', description='Total Persons', value=dataset['Person']['psexpfac'].sum())

    # Total Households
    df = add_row(df, row_name='total_hhs', description='Total Households', value=dataset['Household']['hhexpfac'].sum())

    # Average Household Size
    avg_hh_size = (dataset['Household']['hhsize']*dataset['Household']['hhexpfac']).sum()/dataset['Household']['hhexpfac'].sum()
    df = add_row(df, row_name='avg_hh_size', description='Average Household Size', value=avg_hh_size)

    # Average Trips per Person
    trips_per_person = dataset['Trip']['trexpfac'].sum()/dataset['Person']['psexpfac'].sum()
    df = add_row(df, row_name='trips_per_person', description='Average Trips per Person', value=trips_per_person)

    # Average Trip Length
    trip_len = (dataset['Trip']['travdist']*dataset['Trip']['trexpfac']).sum()/dataset['Trip']['trexpfac'].sum()
    df = add_row(df, row_name='trip_len', description='Average Trips Length', value=trip_len)

    # VMT per capita
    driver_trips = dataset['Trip'][dataset['Trip']['dorp'] == 'Driver']
    vmt_per_cap = (driver_trips['travdist']*driver_trips['trexpfac']).sum()/dataset['Person']['psexpfac'].sum()
    df = add_row(df, row_name='vmt_per_cap', description='VMT per Person', value=vmt_per_cap)

    # Average distance to work
    to_work_tours = dataset['Tour'][dataset['Tour']['pdpurp'] == 'Work']
    dist_to_work = (to_work_tours['tautodist']*to_work_tours['toexpfac']).sum()/to_work_tours['toexpfac'].sum()
    df = add_row(df, row_name='dist_to_work', description='Avg Distance to Work', value=dist_to_work)

    # Average distance to school
    to_school_tours = dataset['Tour'][dataset['Tour']['pdpurp'] == 'School']
    dist_to_school = (to_school_tours['tautodist']*to_school_tours['toexpfac']).sum()/to_school_tours['toexpfac'].sum()
    df = add_row(df, row_name='dist_to_school', description='Avg Distance to School', value=dist_to_school)
    
    # add datasource field
    df['source'] = dataset['name']
    
    return df

In [255]:
def tours(dataset):
    
    tour = dataset['Tour']
    person = dataset['Person']
    
    # total tours
    # join with person file and district names based on destination
    tour_person = pd.merge(tour,person,on=['hhno','pno'])
    tour_person = pd.merge(tour_person,districts[['taz','district_name']],left_on='tdtaz',right_on='taz')
    
    # Tours by person type, purpose, mode, and destination district
    agg_fields = ['pptyp','pdpurp','tmodetp','district_name']
    tours_df = pd.DataFrame(tour_person.groupby(agg_fields)['toexpfac'].sum())
    
    # average trip distance and time
    tours_df = tours_df.join(pd.DataFrame(tour_person.groupby(agg_fields)['tautodist'].mean()))
    tours_df = tours_df.join(pd.DataFrame(tour_person.groupby(agg_fields)['tautotime'].mean()))
    # average trip 
    
    tours_df = tours_df.join(pd.DataFrame(person.groupby('pptyp').sum()['psexpfac']))
    
    # add datasource field
    tours_df['source'] = dataset['name']
    
    return tours_df

In [301]:
def trips(dataset):
    
    trip = dataset['Trip']
    person = dataset['Person']
    
    # total trips
    # join with person file and district names based on destination
    trip_person = pd.merge(trip,person,on=['hhno','pno'])
    trip_person = pd.merge(trip_person,districts[['taz','district_name']],left_on='dtaz',right_on='taz')
    
    # Tours by person type, purpose, mode, and destination district
    agg_fields = ['pptyp','dpurp','mode','district_name']
    trips_df = pd.DataFrame(trip_person.groupby(agg_fields)['trexpfac'].sum())
    
    # average trip distance and time
    trips_df = trips_df.join(pd.DataFrame(trip_person.groupby(agg_fields)['travdist'].mean()))
    trips_df = trips_df.join(pd.DataFrame(trip_person.groupby(agg_fields)['travtime'].mean()))
    # average trip 
    
    trip_person = trip_person.join(pd.DataFrame(person.groupby('pptyp').sum()['psexpfac']),
                                   lsuffix='_x', rsuffix='_y')
    
    # add datasource field
    trips_df['source'] = dataset['name']
    
    return trips_df

In [290]:
def taz_tours(dataset):
    
    tour = dataset['Tour']
    
#     tour_dest = pd.merge(tour,districts[['taz','district_name','lat','lon']],left_on='tdtaz',right_on='taz',how='left')
    tour_dest = pd.DataFrame(tour.groupby('tdtaz').sum()['toexpfac'])
    tour_dest['taz'] = tour_dest.index
    tour_dest.reset_index(inplace=True, drop=True)
    
    
#     tour_origin = pd.merge(tour,districts[['taz','district_name','lat','lon']],left_on='totaz',right_on='taz',how='left')
    tour_origin = pd.DataFrame(tour.groupby('totaz').sum()['toexpfac'])
    tour_origin['taz'] = tour_origin.index
    tour_origin.reset_index(inplace=True, drop=True)
    
    df = pd.merge(tour_dest,tour_origin,on='taz', suffixes=['_dest','_origin'])
    df = pd.merge(df,districts, on='taz',how='left' )
    
    df['source'] = dataset['name']
    
    return df
    

In [426]:
def network_results(model_dir, dataset_name):
    '''
    Process network-level soundcast results and export to csv
    '''
    
    df = pd.read_excel(os.path.join(model_dir,r'outputs/network_summary_detailed.xlsx'), sheetname='Network Summary')
    # drop first row if it's empty (older version of the summary)
    if pd.isnull(df['arterial_vmt'].iloc[0]):
        print 'test'
        try: 
            df.drop('tod',axis=0,inplace=True)
            df['tod'] = df.index
            
        except:
            print 'format error in network_summary'
    else:
        df.index = df['tod']

    df = pd.DataFrame(df.stack())
    
    df['tod'] = df.index.get_level_values(0)
    df['fieldname'] = df.index.get_level_values(1)
    df.rename(columns={0:'model_value'},inplace=True)

    # Drop the rows with TP_4k column headers
    df.drop(df[df['fieldname'] == 'TP_4k'].index, inplace=True)
    df.drop(df[df['fieldname'] == 'tod'].index, inplace=True)
    
    # Split the fields by vmt, vht, delay
    df['facility_type'] = df.fieldname.apply(lambda row: row.split('_')[0])
    df['metric'] = df.fieldname.apply(lambda row: row.split('_')[-1])

    # add dataset name
    df['source'] = dataset_name
    
    return df


In [427]:
# model_runs = [r'R:\SoundCast\releases\TransportationFutures2010',
#               r'R:\SoundCast\releases\soundcast_release_c1']

model_runs = [r'Q:\stefan\soundcast_runtime_test\soundcast',
             r'R:\SoundCast\releases\TransportationFutures2010']

# Assume standard path for survey, which can be overridden as arg
survey_dir = r'R:\SoundCast\Inputs\2014\etc\survey.h5'

output_dir = r'J:\projects\soundcast\soundcast_dashboard\model_output'
overwrite = True

if overwrite:
    for fname in ['agg_measures','district','mode','purpose','tours','network_summary']:
        if os.path.isfile(os.path.join(output_dir,fname+'.csv')):
            os.remove(os.path.join(output_dir,fname+'.csv'))

survey_added = False

for model_dir in model_runs:

    # Process daysim results
    
    daysim_h5 = h5py.File(os.path.join(model_dir,r'outputs/daysim_outputs.h5'))
    # name of scenario is last level of directory
    scenario_name = os.path.basename(model_dir)
    
    print 'processing ' + scenario_name
    
    # Perform calculations and export to csv
    process_dataset(h5file=daysim_h5, scenario_name=scenario_name)
    
    del daysim_h5 # drop from memory to save space for next comparison
    
    if not survey_added:
        print 'processing: survey'
        
        survey_h5 = h5py.File(survey_dir)
        process_dataset(h5file=survey_h5, scenario_name='survey')
        
        survey_added = True
        
    # Process network results
    network_df = network_results(model_dir, dataset_name=scenario_name)
    write_csv(network_df, fname='network_summary.csv')

processing soundcast
processing TransportationFutures2010
test


In [228]:
# daysim_h5 = h5py.File(r'Q:\stefan\soundcast_remove_skims\soundcast\outputs\daysim_outputs.h5')
survey = h5py.File(r'R:\SoundCast\Inputs\2014\etc\survey.h5')
dataset = h5_to_df(survey, table_list=['Household','Trip','Tour','Person','HouseholdDay'], name='test')
dataset = apply_lables(dataset)

tour = dataset['Tour']
person = dataset['Person']
tour_person = pd.merge(tour,person[['hhno','pno','pptyp']],on=['hhno','pno'])

# person.to_csv(os.path.join(output_dir,'person_survey.csv'))

In [241]:
kid_tours = tour_person[tour_person['pptyp'] == 'Child Age 5-15']
kid_school_tours = kid_tours[kid_tours['pdpurp'] == 'School'].sum()['toexpfac']
kid_school_tours
# person[person['pptyp'] == 'Child Age 5-15'].sum()['psexpfac']

265953.08629999927

In [246]:
df = pd.DataFrame(tour_person.groupby(['pptyp','pdpurp','tmodetp'])['toexpfac'].sum())
df.rename(columns={'toexpfac': 'soundcast'}, inplace=True)

# total persons by pptyp


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,soundcast,psexpfac
pptyp,pdpurp,tmodetp,Unnamed: 3_level_1,Unnamed: 4_level_1
Child Age 0-4,Escort,Bike,325.2084,272776.8587
Child Age 0-4,Escort,HOV2,5042.3261,272776.8587
Child Age 0-4,Escort,HOV3+,28053.1246,272776.8587
Child Age 0-4,Escort,Walk,2500.7926,272776.8587
Child Age 0-4,Meal,HOV2,1361.5030,272776.8587
Child Age 0-4,Meal,HOV3+,2621.8866,272776.8587
Child Age 0-4,Meal,Transit,18.5297,272776.8587
Child Age 0-4,Meal,Walk,1842.3667,272776.8587
Child Age 0-4,Personal Business,HOV2,6659.5875,272776.8587
Child Age 0-4,Personal Business,HOV3+,39955.4481,272776.8587


In [218]:
df1 = pd.DataFrame(tour_person.groupby(['pptyp','pdpurp','tmodetp'])['tautodist'].mean())
# df1.rename(columns={'toexpfac': 'soundcast'}, inplace=True)df
df.join(df1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,soundcast,survey,tautodist
pptyp,pdpurp,tmodetp,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Child Age 0-4,Escort,Bike,137,116.45,14.274211
Child Age 0-4,Escort,HOV2,22652,19254.20,5.428938
Child Age 0-4,Escort,HOV3+,86930,73890.50,5.734266
Child Age 0-4,Escort,Walk,4906,4170.10,1.233731
Child Age 0-4,Meal,Bike,71,60.35,4.079963
Child Age 0-4,Meal,HOV2,14508,12331.80,5.696434
Child Age 0-4,Meal,HOV3+,11377,9670.45,6.040423
Child Age 0-4,Meal,Transit,195,165.75,4.977573
Child Age 0-4,Meal,Walk,2131,1811.35,1.420009
Child Age 0-4,Personal Business,Bike,601,510.85,3.409444


In [209]:
df['survey'] = df['soundcast']*0.85

In [210]:
df.to_csv(os.path.join(output_dir,'test_out.csv'))


In [150]:
# dataset['Tour']

tour = dataset['Tour']
person = dataset['Person']

tour_person = pd.merge(tour,person,on=['hhno','pno'])

In [152]:
tour_person[tour_person['pptyp'] == 5].groupby('pdpurp').sum()['toexpfac']

pdpurp
1         17066.7988
2         76329.2257
3          7373.2399
4          6114.0837
5          9219.0494
6          5012.4446
7         11224.9106
Name: toexpfac, dtype: float64

In [92]:
tour_person = pd.pivot_table(tour_person,values='toexpfac',index='pdpurp',columns='pptyp',aggfunc='sum')
tour_person['purpose'] = tour_person.index
tour_person.reset_index(inplace=True, drop=True)

tour_person

pptyp,1,2,3,4,5,6,7,8,purpose
0,1146666.0,239331.0,,,39367,15797,802,,1
1,,,,,80557,178719,507312,51736.0,2
2,158459.0,104240.0,51469.0,260990.0,19984,17656,68556,113795.0,3
3,307931.0,109555.0,96714.0,129753.0,32188,59748,77175,45663.0,4
4,127425.0,48513.0,42566.0,64386.0,10223,6598,11828,11050.0,5
5,160676.0,47088.0,47710.0,67422.0,11840,13239,26448,20016.0,6
6,304017.0,92352.0,104343.0,157445.0,38558,56353,118711,66604.0,7


In [93]:
# Number of persons by type
person.groupby('pptyp')['psexpfac'].sum()

pptyp
1        1227445
2         365397
3         326947
4         488865
5         136234
6         205243
7         595189
8         251219
Name: psexpfac, dtype: float64

In [48]:
survey = df
survey['Trip']['unique_id'] = survey['Trip']['hhno'].astype('str') + '_' + survey['Trip']['pno'].astype('str')
survey['Person']['unique_id'] = survey['Person']['hhno'].astype('str') + '_' + survey['Person']['pno'].astype('str')



KeyboardInterrupt: 

In [None]:
pd.merge(survey['Trip']['mode'])

In [200]:
# # Add unique person ID 
# survey['Trip']['unique_id'] = survey['Trip']['hhno'].astype('str') + '_' + survey['Trip']['pno'].astype('str')
# survey['Person']['unique_id'] = survey['Person']['hhno'].astype('str') + '_' + survey['Person']['pno'].astype('str')

# daysim['Trip']['unique_id'] = daysim['Trip']['hhno'].astype('str') + '_' + daysim['Trip']['pno'].astype('str')
# daysim['Person']['unique_id'] = daysim['Person']['hhno'].astype('str') + '_' + daysim['Person']['pno'].astype('str')

## Add Labels

## Define dataset
- model or observed

In [439]:
# dataset = daysim
dataset = survey


## Aggregate Measures
 - total persons, households, avg household size, avg trips/person, vmt per person, avg distance to work, avg distance to school

In [203]:
df = pd.DataFrame()

# Total Persons
df = add_row(df, row_name='total_persons', description='Total Persons', value=dataset['Person']['psexpfac'].sum())

# Total Households
df = add_row(df, row_name='total_hhs', description='Total Households', value=dataset['Household']['hhexpfac'].sum())

# Average Household Size
avg_hh_size = (dataset['Household']['hhsize']*dataset['Household']['hhexpfac']).sum()/dataset['Household']['hhexpfac'].sum()
df = add_row(df, row_name='avg_hh_size', description='Average Household Size', value=avg_hh_size)

# Average Trips per Person
trips_per_person = dataset['Trip']['trexpfac'].sum()/dataset['Person']['psexpfac'].sum()
df = add_row(df, row_name='trips_per_person', description='Average Trips per Person', value=trips_per_person)

# Average Trip Length
trip_len = (dataset['Trip']['travdist']*dataset['Trip']['trexpfac']).sum()/dataset['Trip']['trexpfac'].sum()
df = add_row(df, row_name='trip_len', description='Average Trips Length', value=trip_len)

# VMT per capita
driver_trips = dataset['Trip'][dataset['Trip']['dorp'] == 'Driver']
vmt_per_cap = (driver_trips['travdist']*driver_trips['trexpfac']).sum()/dataset['Person']['psexpfac'].sum()
df = add_row(df, row_name='vmt_per_cap', description='VMT per Person', value=vmt_per_cap)

# Average distance to work
to_work_tours = dataset['Tour'][dataset['Tour']['pdpurp'] == 'Work']
dist_to_work = (to_work_tours['tautodist']*to_work_tours['toexpfac']).sum()/to_work_tours['toexpfac'].sum()
df = add_row(df, row_name='dist_to_work', description='Avg Distance to Work', value=dist_to_work)

# Average distance to school
to_school_tours = dataset['Tour'][dataset['Tour']['pdpurp'] == 'School']
dist_to_school = (to_school_tours['tautodist']*to_school_tours['toexpfac']).sum()/to_school_tours['toexpfac'].sum()
df = add_row(df, row_name='dist_to_school', description='Avg Distance to School', value=dist_to_school)

# Transit Pass Ownership
# Total Boardings
# Total Workers
# % workers working at home
# work-at-home workers

## Destination Choice Report



In [440]:
tour = dataset['Tour']
trip = dataset['Trip']
hh = dataset['Household']
person = dataset['Person']

## Results by Trip/Tour Purpose
- distance by tour purpose
- distance by trip purpose
- trips per tour by tour purpose
- number of tours by purpose
- tour mode share
- trip mode share

In [None]:
# Tour distance by purpose
tour['tautodist_wt'] = tour['toexpfac']*tour['tautodist']
dist_by_tour_purp = pd.DataFrame(tour.groupby('pdpurp').sum()['tautodist_wt']/tour.groupby('pdpurp').sum()['toexpfac'],
                                columns=['dist_by_tour_purp'])
dist_by_tour_purp['purpose'] = dist_by_tour_purp.index

# Trip distance by purpose
trip['travdist_wt'] = trip['trexpfac']*trip['travdist']
dist_by_trip_purp = pd.DataFrame(trip.groupby('dpurp').sum()['travdist_wt']/trip.groupby('dpurp').sum()['trexpfac'],
                                columns=['dist_by_trip_purp'])
dist_by_trip_purp['purpose'] = dist_by_trip_purp.index

# number of total tours generated by purpose
total_tours = pd.DataFrame(tour.groupby('pdpurp').sum()['toexpfac'])
total_tours.rename(columns={'toexpfac':'total_tours'},inplace=True)
total_tours['purpose'] = total_tours.index
total_tours.reset_index(inplace=True, drop=True)

# trip mode share
trips_by_mode = pd.pivot_table(trip,values='trexpfac',index='dpurp',columns='mode',aggfunc='sum')
trips_by_mode.columns = [i+'_trips' for i in trips_by_mode.columns]
trips_by_mode['purpose'] = trips_by_mode.index
trips_by_mode.reset_index(inplace=True, drop=True)

# tour mode share
tours_by_mode = pd.pivot_table(tour,values='toexpfac',index='pdpurp',columns='tmodetp',aggfunc='sum')
tours_by_mode.columns = [i+'_tours' for i in tours_by_mode.columns]
tours_by_mode['purpose'] = tours_by_mode.index
tours_by_mode.reset_index(inplace=True, drop=True)

# merge all dataframes with data by purpose
df_list = [dist_by_tour_purp, dist_by_trip_purp, total_tours, tours_by_mode, trips_by_mode]
purpose_df = reduce(lambda left,right: pd.merge(left,right,on='purpose'), df_list)

# add field specif
purpose_df['source'] = dataset['name']
purpose_df.to_csv(dataset['name'] + '_' + 'purpose_df.csv')

## Results By Mode
- distance by tour mode
- trips per tour by tour mode
- trip mode by tour mode
- number of tours by mode
- travel time by mode

In [441]:
# Tour distance by purpose
tour['tautodist_wt'] = tour['toexpfac']*tour['tautodist']
dist_by_tour_mode = pd.DataFrame(tour.groupby('tmodetp').sum()['tautodist_wt']/tour.groupby('tmodetp').sum()['toexpfac'],
                                columns=['dist_by_tour_mode'])
dist_by_tour_mode['mode'] = dist_by_tour_mode.index

# Trip distance by purpose
trip['travdist_wt'] = trip['trexpfac']*trip['travdist']
dist_by_trip_mode = pd.DataFrame(trip.groupby('mode').sum()['travdist_wt']/trip.groupby('mode').sum()['trexpfac'],
                                columns=['dist_by_trip_mode'])
dist_by_trip_mode['mode'] = dist_by_trip_mode.index

# merge all dataframes with data by purpose
df_list = [dist_by_tour_mode, dist_by_trip_mode]
mode_df = reduce(lambda left,right: pd.merge(left,right,on='mode'), df_list)

# add field specif
mode_df['source'] = dataset['name']
mode_df.to_csv(dataset['name'] + '_' + 'mode_df.csv')

## Results By District
- tours by destination district
- trips by destination district
- residents by district

In [424]:
# Trips by Destination District
trip_district = pd.merge(trip[['dtaz','trexpfac']],districts,left_on='dtaz',right_on='taz')
trip_district_df = pd.DataFrame(trip_district.groupby('district_name')['trexpfac'].sum())
trip_district_df.rename(columns={'trexpfac':'trips'},inplace=True)
trip_district_df.reset_index(inplace=True)

# Tours by Destination District
tour_district = pd.merge(tour[['tdtaz','toexpfac']],districts,left_on='tdtaz',right_on='taz')
tour_district_df = pd.DataFrame(tour_district.groupby('district_name')['toexpfac'].sum())
tour_district_df.rename(columns={'toexpfac':'tours'},inplace=True)
tour_district_df.reset_index(inplace=True)

# Residents by District
hh_per_district = pd.merge(hh[['hhtaz','hhsize','hhexpfac','hhno']], districts, left_on='hhtaz', right_on='taz')

hh_per_district['wt_hhsize'] = hh_per_district['hhsize']*hh_per_district['hhexpfac']
residents_df = pd.DataFrame(hh_per_district.groupby('district_name').sum()['wt_hhsize'])
residents_df.rename(columns={'wt_hhsize':'residents'},inplace=True)
residents_df.reset_index(inplace=True)

# Merge these all together
df_list = [trip_district_df, tour_district_df, residents_df]
district_df = reduce(lambda left,right: pd.merge(left,right,on='district_name'), df_list)

# add dataset name
district_df['source'] = dataset['name']

# Network Summary

In [425]:
net_summary_df = pd.read_excel(r'J:\Projects\Soundcast\network_summary_detailed.xlsx', sheetname='Network Summary')
df = pd.DataFrame(net_summary_df.stack())
df['tod']= df.index.get_level_values(0)
df['fieldname'] = df.index.get_level_values(1)
df.rename(columns={0:'model_value'},inplace=True)
df.reset_index(inplace=True, drop=True)
# Drop the rows with TP_4k column headers
df.drop(df[df['fieldname'] == 'TP_4k'].index, inplace=True)
# Split the fields by vmt, vht, delay
df['facility_type'] = df.fieldname.apply(lambda row: row.split('_')[0])
df['metric'] = df.fieldname.apply(lambda row: row.split('_')[-1])

## Time of Day