In [3]:
import pandas as pd
import config

In [4]:
pd.set_option('display.max_rows',100)

### Load and process trips data

In [5]:
trips = pd.read_csv(config.TRIPS_1216_RAW, low_memory=False)
trips.shape

(128684, 90)

In [6]:
trips.head(5).T

Unnamed: 0,0,1,2,3,4
TRIPID,Y12H0000101P01T01,Y12H0000101P01T02,Y12H0000101P02T01,Y12H0000101P02T02,Y12H0000101P02T03
PERSID,Y12H0000101P01,Y12H0000101P01,Y12H0000101P02,Y12H0000101P02,Y12H0000101P02
HHID,Y12H0000101,Y12H0000101,Y12H0000101,Y12H0000101,Y12H0000101
TRIPNO,1,2,1,2,3
TRAVDOW,Monday,Monday,Monday,Monday,Monday
STOPS,1,1,1,1,1
STARTHOUR,8,17,8,9,10
STARTIME,525,1050,500,540,600
ARRHOUR,9,17,8,9,10
ARRTIME,545,1075,520,550,610


In [7]:
trips.Mode1.value_counts()

Vehicle Driver       66976
Vehicle Passenger    30640
Walking              27545
Bicycle               2150
Taxi                   376
Other                  366
Motorcycle             292
School Bus             219
Train                   79
Public Bus              28
Tram                    13
Name: Mode1, dtype: int64

In [8]:
trips.LINKMODE.value_counts()

Vehicle Driver       66751
Vehicle Passenger    30301
Walking              20277
Train                 5065
Bicycle               2080
Tram                  1375
Public Bus            1238
School Bus             592
Taxi                   378
Other                  335
Motorcycle             292
Name: LINKMODE, dtype: int64

In [9]:
trips.columns

Index(['TRIPID', 'PERSID', 'HHID', 'TRIPNO', 'TRAVDOW', 'STOPS', 'STARTHOUR',
       'STARTIME', 'ARRHOUR', 'ARRTIME', 'ORIGSTOP', 'DESTSTOP', 'ORIGSA1',
       'ORIGSA3', 'ORIGLGA', 'ORIGPLACE1', 'ORIGPLACE2', 'ORIGPURP1',
       'ORIGPURP2', 'DESTPLACE1', 'DESTPLACE2', 'DESTPURP1', 'DESTPURP2',
       'DESTSA1', 'DESTSA3', 'DESTLGA', 'CUMDIST', 'DEPHOUR', 'DEPTIME',
       'MODESTOPID', 'LINKMODE', 'TRIPTIME', 'TRAVTIME', 'WAITIME', 'DURATION',
       'Mode1', 'Mode2', 'Mode3', 'Mode4', 'Mode5', 'Mode6', 'Mode7', 'Mode8',
       'Mode9', 'Time1', 'Time2', 'Time3', 'Time4', 'Time5', 'Time6', 'Time7',
       'Time8', 'Time9', 'Dist1', 'Dist2', 'Dist3', 'Dist4', 'Dist5', 'Dist6',
       'Dist7', 'Dist8', 'Dist9', 'TRIPPURP', 'Mode_Group', 'DIST_GRP',
       'Time_Grp', 'STUDYING', 'HomeSA1', 'HomeSA2', 'HomeSA3', 'HomeSA4',
       'HOMELGA', 'HomeRegion_ASGS', 'HomeSubRegion_ASGS', 'CW_ADTRIPWGT_SA3',
       'CW_WDTRIPWGT_SA3', 'CW_WETRIPWGT_SA3', 'RP_ADTRIPWGT_SA3',
       'RP_WDTRIPWG

### Select related columns

In [10]:
columns = ['TRIPID', 'PERSID', 'HHID', 'TRIPNO', 'TRAVDOW', 'LINKMODE', 'TRIPPURP','DESTPURP1',
           'STARTIME', 'ARRTIME', 'TRIPTIME', 'TRAVTIME', 'WAITIME', 
           'ORIGSA1', 'DESTSA1', 'CUMDIST', 'HomeSA1',]

In [11]:
trips = trips[columns]

In [12]:
trips.LINKMODE.value_counts()

Vehicle Driver       66751
Vehicle Passenger    30301
Walking              20277
Train                 5065
Bicycle               2080
Tram                  1375
Public Bus            1238
School Bus             592
Taxi                   378
Other                  335
Motorcycle             292
Name: LINKMODE, dtype: int64

### Process main travel mode from LinkMode

In [14]:
trips.LINKMODE.value_counts()

Vehicle Driver       66751
Vehicle Passenger    30301
Walking              20277
Train                 5065
Bicycle               2080
Tram                  1375
Public Bus            1238
School Bus             592
Taxi                   378
Other                  335
Motorcycle             292
Name: LINKMODE, dtype: int64

In [15]:
# Map to 4 main travel modes: Walk, Car, Train, CycleTRIPPURP
mode_mapping = {'Walking':'Walk', 'Vehicle Driver': 'Car', 'Vehicle Passenger': 'Car', 'Taxi': 'Car',
                'Bicycle':'Cycle', 'Train':'Train', 'Tram':'Train', 'Public Bus': 'Car',}

trips['Mode'] = trips.LINKMODE.map(mode_mapping)

In [16]:
trips['Mode'].value_counts()

Car      98668
Walk     20277
Train     6440
Cycle     2080
Name: Mode, dtype: int64

### Process trip purpose

In [18]:
trips.DESTPURP1.value_counts()

At or Go Home                   50451
Work Related                    16476
Buy Something                   13870
Social                          13031
Pick-up or Drop-off Someone      9331
Personal Business                7163
Recreational                     6773
Education                        4962
Accompany Someone                4173
Pick-up or Deliver Something     1841
Other Purpose                     486
Change Mode                       125
Not Stated                          2
Name: DESTPURP1, dtype: int64

In [19]:
trips.TRIPPURP.value_counts()

Work Related                         26746
Buy Something                        23220
Social                               21006
Pick-up or Drop-off Someone          14558
Recreational                         11828
Personal Business                    11461
Education                             8774
Accompany Someone                     6873
Pick-up or Deliver Something          2784
Other Purpose                          752
Unknown purpose (at start of day)      446
Change Mode                            170
At or Go Home                           64
Not Stated                               2
Name: TRIPPURP, dtype: int64

In [20]:
# Map trip purpose
mapping = {"Work Related":"Work", "Buy Something":"Shopping", "Social":"Social", "Pick-up or Drop-off Someone":"Pick-up_Drop-off",
           "Recreational":"Recreational","Personal Business":"Personal Business","Education":"Education","Accompany Someone":"Pick-up_Drop-off",
           "Pick-up or Deliver Something":"Pick-up_Drop-off", "Other Purpose":"Other", "Unknown purpose (at start of day)":"Other",
           "Change Mode":"Other", "At or Go Home":"Other", "Not Stated":"Other"}
trips.TRIPPURP = trips.TRIPPURP.map(mapping)

In [21]:
trips.TRIPPURP.value_counts()

Work                 26746
Pick-up_Drop-off     24215
Shopping             23220
Social               21006
Recreational         11828
Personal Business    11461
Education             8774
Other                 1434
Name: TRIPPURP, dtype: int64

### Select people with at least one trip with active mode (Walking, Bicycle)

In [22]:
# How many people have at leat one active mode trip
trips[trips.LINKMODE.isin(['Walking','Bicycle'])].PERSID.nunique()

9679

In [23]:
active_persons = trips[trips.LINKMODE.isin(['Walking','Bicycle'])].PERSID.unique()
type(active_persons), len(active_persons)

(numpy.ndarray, 9679)

In [24]:
# Get trips of these active persons
trips_ap = trips[trips.PERSID.isin(active_persons)]
trips_ap.shape

(44592, 18)

In [25]:
trips_ap.isna().sum()

TRIPID         0
PERSID         0
HHID           0
TRIPNO         0
TRAVDOW        0
LINKMODE       0
TRIPPURP       0
DESTPURP1      0
STARTIME       0
ARRTIME        0
TRIPTIME       0
TRAVTIME       0
WAITIME        0
ORIGSA1        0
DESTSA1        0
CUMDIST        0
HomeSA1        0
Mode         241
dtype: int64

In [26]:
trips_ap = trips_ap.dropna()

In [27]:
trips_ap.shape

(44351, 18)

### Save to csv file for further analysis

In [33]:
trips_ap['Mode'].value_counts()

Walk     20277
Car      19823
Train     2171
Cycle     2080
Name: Mode, dtype: int64

In [34]:
trips_ap.to_csv(config.TRIPS_1216_PRC, index=False)

In [35]:
trips_ap.columns

Index(['TRIPID', 'PERSID', 'HHID', 'TRIPNO', 'TRAVDOW', 'LINKMODE', 'TRIPPURP',
       'DESTPURP1', 'STARTIME', 'ARRTIME', 'TRIPTIME', 'TRAVTIME', 'WAITIME',
       'ORIGSA1', 'DESTSA1', 'CUMDIST', 'HomeSA1', 'Mode'],
      dtype='object')

In [36]:
trips_ap.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,TRAVDOW,LINKMODE,TRIPPURP,DESTPURP1,STARTIME,ARRTIME,TRIPTIME,TRAVTIME,WAITIME,ORIGSA1,DESTSA1,CUMDIST,HomeSA1,Mode
40,Y12H0000107P01T01,Y12H0000107P01,Y12H0000107,1,Sunday,Vehicle Driver,Social,Social,630,650,20,20,0,20904122028,20904122002,2.6,20904122028,Car
41,Y12H0000107P01T02,Y12H0000107P01,Y12H0000107,2,Sunday,Vehicle Driver,Social,At or Go Home,750,760,10,10,0,20904122002,20904122028,2.6,20904122028,Car
42,Y12H0000107P01T03,Y12H0000107P01,Y12H0000107,3,Sunday,Walking,Recreational,Recreational,810,840,30,30,0,20904122028,20904122031,3.23,20904122028,Walk
43,Y12H0000107P01T04,Y12H0000107P01,Y12H0000107,4,Sunday,Walking,Recreational,At or Go Home,855,885,30,30,0,20904122031,20904122028,3.23,20904122028,Walk
44,Y12H0000107P02T01,Y12H0000107P02,Y12H0000107,1,Sunday,Vehicle Passenger,Social,Social,630,650,20,20,0,20904122028,20904122002,2.6,20904122028,Car
