In [2]:
import os
import pandas as pd

In [3]:
# Read in any network_summary_detailed file in the working directory

In [4]:
# input_dir = sys.argv[1]
# output_dir = sys.argv[2]
input_dir = r'C:\Users\Brice\surveys\surveys\net_summaries'
output_dir = r'C:\Users\Brice\surveys\surveys\net_summaries'

In [5]:
# look up psrc time of day 
tod_list = ['5to6','6to7','7to8','8to9','9to10','10to14','14to15','16to17','17to18','18to20']

tod_lookup = {  0:'20to5',
                1:'20to5',
                2:'20to5',
                3:'20to5',
                4:'20to5',
                5:'5to6',
                6:'6to7',
                7:'7to8',
                8:'8to9',
                9:'9to10',
                10:'10to14',
                11:'10to14',
                12:'10to14',
                13:'10to14',
                14:'14to15',
                15:'15to16',
                16:'16to17',
                17:'17to18',
                18:'18to20',
                19:'18to20',
                20:'18to20',
                21:'20to5',
                22:'20to5',
                23:'20to5' }

In [6]:
def main():
    output_csv_list = ['transit_boardings','traffic_counts','net_summary']

    overwrite = True

    if overwrite:
        for fname in output_csv_list:
            if os.path.isfile(os.path.join(output_dir,fname+'.csv')):
                os.remove(os.path.join(output_dir,fname+'.csv'))

    for fname in os.listdir(input_dir):
        if fname.endswith('.xlsx'):
            net_file = os.path.join(input_dir,fname)

            print 'processing ' + fname

            transit_summary(net_file, fname)
            traffic_counts(net_file, fname)
            net_summary(net_file, fname)

In [7]:
def write_csv(df, fname):
    '''
    Write dataframe to file; append existing file
    '''

    if not os.path.isfile(os.path.join(output_dir,fname)):
        df.to_csv(os.path.join(output_dir,fname), index=False)
    else: # append without writing the header
        df.to_csv(os.path.join(output_dir,fname), mode ='a', header=False, index=False)

In [8]:
def transit_summary(net_file, fname):
    
    transit_df = pd.read_excel(net_file, sheetname='Transit Summaries')
    transit_df.index = transit_df['route_code']
    
    # Add model results
    dict_result = {}
    for field in ['board','time']:
        df = pd.DataFrame(transit_df[[tod+'_'+ field for tod in tod_list]].stack())
        df.rename(columns={0:field}, inplace=True)
        df['tod'] = [i.split('_')[0] for i in df.index.get_level_values(1)]
        df['route_id'] = df.index.get_level_values(0)
        df.reset_index(inplace=True, drop=True)

        dict_result[field] = df

    # Only keep the boardings for now - observed time data is not available at the route level
    df = dict_result['board'].groupby(['route_id','tod']).sum()
    df.reset_index(inplace=True)
    df['source'] = fname.split('.xlsx')[0]

    fname_out = 'transit_boardings.csv'
    write_csv(df=df, fname=fname_out)
            
    # Add observed data if it doesn't already exist
    if 'observed' not in pd.read_csv(os.path.join(output_dir,fname_out))['source'].values:
        
        df = pd.read_csv('data/transit_boardings_2014.csv')
        df.index = df['PSRC_Rte_ID']
        df.drop([u'Unnamed: 0','PSRC_Rte_ID','SignRt'],axis=1,inplace=True)

        df = pd.DataFrame(df.stack())
        df.reset_index(inplace=True)
        df.rename(columns={0:'board', 'level_1':'hour','PSRC_Rte_ID':'route_id'}, inplace=True)
              
        # Convert hour to time of day definition
        df['hour'] = df['hour'].apply(lambda row: row.split('_')[-1])
        tod_df = pd.DataFrame(data=tod_lookup.values(),index=tod_lookup.keys(), columns=['tod'])
        tod_df['hour'] = tod_df.index.astype('str')

        df = pd.merge(df,tod_df,on='hour')
        df.drop('hour', axis=1,inplace=True)
        
        # Group by tod
        df = df.groupby(['tod','route_id']).sum()
        df['tod'] = df.index.get_level_values(0)
        df['route_id'] = df.index.get_level_values(1)
        df.reset_index(inplace=True, drop=True)

        df['source'] = 'observed'
        
        # Re-order results to match model data
        df = df[['route_id','tod','board','source']]
        
        write_csv(df=df, fname=fname_out)

In [9]:
def traffic_counts(net_file, fname):
    counts_df = pd.read_excel(net_file, sheetname='Counts Output')

    counts_df.drop([u'OBJECTID_1', u'Join_Count', u'TARGET_FID', u'OBJECTID', u'SR', u'RID', 
             u'MP', u'ARM', u'Type_', u'Lanes', u'Oneway', u'Dir', u'ID',
             u'HOV_I', u'HOV_J'],
           axis=1, inplace=True)
    
    # Model results
    df = counts_df
    df = df[['vol'+str(i) for i in tod_list]+['NewINode']]
    df = df.set_index(keys='NewINode',drop=True)

    # realign
    df = pd.DataFrame(df.stack())

    df['tod'] = df.index.get_level_values(1)
    df['tod'] = df['tod'].apply(lambda row: row.split('vol')[-1])
    df['NewINode'] = df.index.get_level_values(0)
    df.rename(columns={0:'volume'}, inplace=True)
    df.reset_index(inplace=True,drop=True)
    
    df['source'] = fname.split('.xlsx')[0]
    
    fname_out = 'traffic_counts.csv'
    write_csv(df=df, fname=fname_out)

    # Observed results
    if 'observed' not in pd.read_csv(os.path.join(output_dir,fname_out))['source'].values:
    
        df = counts_df
        for i in xrange(24):
            if i < 10:
                df = df.rename(columns={'Vol_0'+str(i): str(i)})
            df = df.rename(columns={'Vol_'+str(i): str(i)})

        df = df[[str(i) for i in xrange(24)]+['NewINode']]
        df = df.set_index(keys='NewINode',drop=True)

        # realign
        df = pd.DataFrame(df.stack())
        df.reset_index(inplace=True)
        df.rename(columns={'level_1':'hour',0:'volume'}, inplace=True)

        # Load tod lookup
        tod_df = pd.DataFrame(data=tod_lookup.values(),index=tod_lookup.keys(), columns=['tod'])
        tod_df['hour'] = tod_df.index.astype('str')

        df = pd.merge(df,tod_df,on='hour')
        df.drop('hour', axis=1,inplace=True)

        # Group by tod
        df = df.groupby(['tod','NewINode']).sum()
        df['tod'] = df.index.get_level_values(0)
        df['NewINode'] = df.index.get_level_values(1)
        df.reset_index(inplace=True, drop=True)

        df['source'] = 'observed'

        write_csv(df=df, fname=fname_out)

In [10]:
def net_summary(net_file, fname):
    
    net_summary_df = pd.read_excel(net_file, sheetname='Network Summary')
    net_summary_df.index = net_summary_df['tod']
    df = pd.DataFrame(net_summary_df.stack())
    df.reset_index(inplace=True)
    df.rename(columns={0:'value','level_1':'fieldname'}, inplace=True)

    # Drop the rows with TP_4k column headers
    df.drop(df[df['fieldname'] == 'TP_4k'].index, inplace=True)
    df.drop(df[df['fieldname'] == 'tod'].index, inplace=True)

    # Split the fields by vmt, vht, delay
    df['facility_type'] = df.fieldname.apply(lambda row: row.split('_')[0])
    df['metric'] = df.fieldname.apply(lambda row: row.split('_')[-1])

    df['source'] = fname.split('.xlsx')[0]
    
    write_csv(df=df, fname='net_summary.csv')

In [11]:
main()

processing latest_run_2014.xlsx
processing test_branch_14.xlsx
