In [1]:
%matplotlib inline

In [2]:
import sys, os
sys.path.append(os.path.join(os.path.dirname(os.curdir), '..', 'lib'))

In [3]:
import os
import numpy as np
import pandas as pd
import dataset_importers as di

# Import files

In [15]:
userhome = os.path.expanduser('~')
userhome = userhome.replace('C:', 'D:')

In [17]:
# Tables to retrieve
nts_tables = ['Trip', 'Individual']

In [18]:
infolder = os.path.join(userhome, r'OneDrive - Transport Systems Catapult\Modelling\Data\NTS\UKDA-5340-tab')
infile = r'5340_nts_lookup_tables_banded_variables_2015_ukds.xls'
infilepath_metadata = os.path.join(infolder, r'mrdoc\excel', infile)

infolder_ntsdata = infolder

### Import the NTS data

In [19]:
nts = di.get_nts(nts_tables, infolder_ntsdata, infilepath_metadata)

## Analysis

In [20]:
#TODO improve performance (maybe dont use apply)
def assign_tripPurpose(x):
    tpo = x.TripPurpFrom_B01ID
    tpd = x.TripPurpTo_B01ID
    
    if tpd=='Home':
        tpo, tpd = tpd, tpo

    if tpo=='Home':
        purpose = 'HB{}'.format(tpd[0])
    else:
        purpose = 'NHB{}'.format(tpd[0])
    
    return purpose
    

In [85]:
trips = nts['Trip']

In [86]:
trips_cols = ['SurveyYear', 'IndividualID', 'MainMode_B04ID', 'TripPurpFrom_B01ID',
              'TripPurpTo_B01ID', 'TripOrigGOR_B02ID', 'TripDestGOR_B02ID', 'W5', 'JJXSC']
trips = trips[trips_cols]

In [87]:
trips.head(2)

Unnamed: 0_level_0,SurveyYear,IndividualID,MainMode_B04ID,TripPurpFrom_B01ID,TripPurpTo_B01ID,TripOrigGOR_B02ID,TripDestGOR_B02ID,W5,JJXSC
TripID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2002000001,2002,2002000001,Car / van passenger,Home,Visit friends,London,London,0.989618,1
2002000002,2002,2002000001,Car / van passenger,Visit friends,Personal business other,London,London,1.002945,1


In [88]:
df = trips.copy()
df['WeightTrip'] = df['W5'] * df['JJXSC']
df.drop(['W5', 'JJXSC'], axis=1, inplace=True)
#df.head(2)

### Filter the London trips 

In [89]:
idxslc = pd.IndexSlice

In [90]:
df = df[(df.TripOrigGOR_B02ID=='London') & (df.TripDestGOR_B02ID=='London')]
df.drop(['TripOrigGOR_B02ID', 'TripDestGOR_B02ID'], axis=1, inplace=True)
#df.head(2)

In [91]:
trips.columns

Index(['SurveyYear', 'IndividualID', 'MainMode_B04ID', 'TripPurpFrom_B01ID',
       'TripPurpTo_B01ID', 'TripOrigGOR_B02ID', 'TripDestGOR_B02ID', 'W5',
       'JJXSC'],
      dtype='object')

In [92]:
# From and To trip purposes contain the same categories
grp_purpose_keys = list(df.TripPurpTo_B01ID.unique())
grp_purpose_vals = ['Other', 'Other', 'Home', 'Other', 'Other', 'Other', 'Other', 'Work', 'Other', 'Other', 'Other', 'Other', 'Work', 'Other', 'Work', 'Home', 'Other', 'Other', 'Other', 'Other', 'Other', 'Other', 'Work']

dict_grp_purpose = dict(zip(grp_purpose_keys, grp_purpose_vals))
#dict_grp_purpose

In [93]:
list(df.MainMode_B04ID.unique())

['Car / van passenger',
 'Taxi / minicab',
 'Car / van driver',
 'Walk',
 'Bus in London',
 'Surface Rail',
 'Other local bus',
 'London Underground',
 'Bicycle',
 'Other private transport',
 'Motorcycle ',
 'Other public transport',
 'Non-local bus']

In [94]:
# Dictionary for aggregating modes
grp_mode_keys = list(df.MainMode_B04ID.unique())
grp_mode_vals = ['CarPass', 'Taxi', 'CarDriver', 'Walk', 'Bus', 'Rail', 'Bus', 'Rail', 'Other', 'Other', 'Other', 'Other', 'Other'] 
dict_grp_mode = dict(zip(grp_mode_keys, grp_mode_vals))
#dict_grp_mode

In [95]:
df.MainMode_B04ID = df.MainMode_B04ID.map(dict_grp_mode)
df.TripPurpFrom_B01ID = df.TripPurpFrom_B01ID.map(dict_grp_purpose)
df.TripPurpTo_B01ID = df.TripPurpTo_B01ID.map(dict_grp_purpose)
df.head(2)

Unnamed: 0_level_0,SurveyYear,IndividualID,MainMode_B04ID,TripPurpFrom_B01ID,TripPurpTo_B01ID,WeightTrip
TripID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2002000001,2002,2002000001,CarPass,Home,Other,0.989618
2002000002,2002,2002000001,CarPass,Other,Other,1.002945


In [96]:
df = df.reset_index(drop=True)
purps = df.reset_index().apply(assign_tripPurpose, axis=1)
df.insert(4, 'TripPurpose',  purps)

In [97]:
df.head(2)

Unnamed: 0,SurveyYear,IndividualID,MainMode_B04ID,TripPurpFrom_B01ID,TripPurpose,TripPurpTo_B01ID,WeightTrip
0,2002,2002000001,CarPass,Home,HBO,Other,0.989618
1,2002,2002000001,CarPass,Other,NHBO,Other,1.002945


In [98]:
df.WeightTrip.sum()

538517.7870326295

In [99]:
# Further purpose aggregation
purps = ['HBO', 'NHBO', 'NHBW', 'HBW', 'HBH']
aggr_purps = ['HBO', 'NHB', 'NHB', 'HBW', 'HBO']
dict_grp_purps2 = dict(zip(purps, aggr_purps))
dict_grp_purps2

{'HBH': 'HBO', 'HBO': 'HBO', 'HBW': 'HBW', 'NHBO': 'NHB', 'NHBW': 'NHB'}

In [100]:
df.TripPurpose = df.TripPurpose.map(dict_grp_purps2)

In [101]:
df.WeightTrip.sum()

538517.7870326295

In [102]:
trips_purpose = df.copy()

In [124]:
df = trips_purpose.copy()
df = df[df.SurveyYear >= 2012]
cols = ['SurveyYear', 'MainMode_B04ID', 'TripPurpose', 'WeightTrip']
df = df[cols]
# Groupby Year, Mode, Purpose
dfg = df.groupby(['SurveyYear', 'MainMode_B04ID', 'TripPurpose']).WeightTrip.sum()
# Groupby Mode, Purpose
dfg = dfg.groupby(['MainMode_B04ID', 'TripPurpose']).mean()
df = dfg.loc[['CarPass', 'Taxi']]

carpass_taxi_split_bypurpose = df.groupby(level='TripPurpose').apply(lambda x: x.div(x.sum()))

In [125]:
carpass_taxi_split_bypurpose = carpass_taxi_split_bypurpose.reset_index()
carpass_taxi_split_bypurpose.columns = ['Mode', 'TripPurpose', 'Percentage']
carpass_taxi_split_bypurpose

Unnamed: 0,Mode,TripPurpose,Percentage
0,CarPass,HBO,0.946891
1,CarPass,HBW,0.83657
2,CarPass,NHB,0.879673
3,Taxi,HBO,0.053109
4,Taxi,HBW,0.16343
5,Taxi,NHB,0.120327


## Export

In [126]:
carpass_taxi_split_bypurpose.to_csv('../Building/Inputs/carpass_taxi_modesplit_byPurp.csv', index=False)