In [1]:
import pandas as pd
import numpy as np
import scipy.ndimage
pd.set_option('display.max_column', None)

In [2]:
trips_all = pd.read_csv('https://beam-outputs.s3.amazonaws.com/new_city/nhts/trippub.csv.gz',
                    usecols=[0, 1, 2, 3, 4, 5, 6, 7, 17, 26, 28, 58, 59, 60, 61, 64, 69, 70, 71, 72, 73, 74, 84, 89, 93,
                             102, 103])
persons_all = pd.read_csv('https://beam-outputs.s3.amazonaws.com/new_city/nhts/perpub.csv.gz')
# dictionaries for activity names and mode names
actnames = {1:'Home',2:'Home',3:'Work',8:'Work',11:'Shopping',12:'Shopping',13:'Meal',15:'SocRec',16:'SocRec',17:'SocRec',19:'SocRec'}
modenames = {1:'Walk',2:'Bike',3:'Car',4:'Car',5:'Car',6:'Car',7:'Car',8:'Car',9:'Car',10:'Bus',11:'Bus',13:'Bus',14:'Bus',15:'Rail',16:'Rail',17:'Ridehail',18:'Rental Car'}

In [3]:
def label_commuter(row):
    if (row['WHYTO'] == 'Home' and row['WHYFROM'] == 'Work') or (row['WHYTO'] == 'Work' and row['WHYFROM'] == 'Home'):
        return 'Commuter'
    return 'Non-Commuter'

In [4]:
def getActivities(trips):
    commuter = trips['commuter'].values
    startTimes = trips.startHour.values
    endTimes = trips.endHour.values
    durations = endTimes - startTimes
    weights = trips.WTTRDFIN.values
    return pd.DataFrame({'commuter': commuter, 'startTime':startTimes,'endTime':endTimes,'duration':durations,'weight':weights})

In [5]:
def getCommuters(activities):
    activities.reset_index(inplace=True)
    commuters = ['Non-Commuter', 'Commuter']
    intercepts = dict()
    nPeople = trips.drop_duplicates('UniquePID').WTTRDFIN.sum()
    for commuter in commuters:
        counts, bins = np.histogram(activities.loc[activities.commuter == commuter,'startTime'],range(26), weights = activities.loc[activities.commuter == commuter,'weight'])
        counts = counts / nPeople
        counts[counts < 0.015] = 0.0
        intercepts[commuter] = counts
    df = pd.DataFrame(intercepts, columns=commuters)
    df.index.name = 'Hour'
    return df

In [8]:
for cbsa in ['12420','41860']:
    trips = trips_all.loc[(trips_all['HH_CBSA'] == cbsa) , :]
    
    trips.replace({'WHYTO' : actnames, 'WHYFROM' : actnames}, inplace=True)
    # TDWKND 2 means not on weekend
    # TRPTRANS is Transportation mode used on trip (as reported by respondent), 19 = Taxicab
    valid = (trips.TRPMILES > 0) & (trips.TDWKND == 2) & (trips.TRPTRANS != 19) & (trips.ENDTIME > trips.STRTTIME)

    trips = trips.loc[valid, :]
    trips['UniquePID'] = trips.HOUSEID * 100 + trips.PERSONID
    trips['startHour'] = np.floor(trips.STRTTIME / 100) + np.mod(trips.STRTTIME, 100) / 60
    trips['endHour'] = np.floor(trips.ENDTIME / 100) + np.mod(trips.ENDTIME, 100) / 60
    trips['toWork'] = (trips.WHYTO == 3) | (trips.WHYTO == 4)
    trips['fromWork'] = (trips.WHYFROM == 3) | (trips.WHYFROM == 4)
    trips['fromHome'] = (trips.WHYFROM == 1)
    trips['toHome'] = (trips.WHYTO == 1)
    trips['commuter'] = trips.apply(lambda row: label_commuter(row), axis=1)
    out = trips.groupby('UniquePID').apply(getActivities)
    out.reset_index(inplace=True)
    
    out = out[out['startTime'] >= 0]
    intercepts = getCommuters(out)
    intercepts.to_csv('outputs/commuters_intercepts-' + cbsa + '.csv')