# JourneyPatternID is a combination of Variant and Direction

In [1]:
import pandas as pd
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
%matplotlib inline

### Constants

In [2]:
lines = ['1', '102', '104', '11', '111', '114', '116', '118', '120', '122', '123', '13', '130', '14', '140', '142', '145', '14C', '15', '150', '151', '15A', '15B', '16', '161', '16C', '17', '17A', '18', '184', '185', '220', '236', '238', '239', '25', '25A', '25B', '25X', '26', '27', '270', '27A', '27B', '27X', '29A', '31', '31A', '31B', '32', '32X', '33', '332', '33A', '33B', '33X', '37', '38', '38A', '38B', '39', '39A', '4', '40', '40B', '40D', '41', '41A', '41B', '41C', '41X', '42', '43', '44', '44B', '45A', '46A', '46E', '47', '49', '51D', '51X', '53', '54A', '56A', '59', '61', '63', '65', '65B', '66', '66A', '66B', '66X', '67', '67X', '68', '68A', '69', '69X', '7', '70', '747', '75', '76', '76A', '77A', '79', '79A', '7B', '7D', '8', '83', '83A', '84', '84A', '84X', '86', '9']

timeframes = ["2012-11-23", "2012-11-27", "2012-11-09", "2012-11-22", "2012-11-16", "2012-11-29", "2012-11-15", 
            "2012-11-08", "2013-01-24", "2013-01-09", "2013-01-17", "2012-11-07", "2013-01-11", "2012-11-06", "2012-11-14", "2013-01-29",
            "2012-11-19", "2012-11-13", "2013-01-23", "2013-01-22", "2013-01-08", "2012-11-20", "2013-01-15", "2013-01-25","2012-11-21",
            "2013-01-14", "2013-01-28", "2013-01-16", "2013-01-04", "2013-01-10", "2013-01-18", "2012-11-26", "2012-11-28", "2013-01-07", 
            "2013-01-03", "2013-01-21", "2012-11-12", "2013-01-02", "2012-11-30", "2013-01-12", "2012-11-10", "2012-11-17", "2013-01-26",
            "2013-01-19", "2013-01-05", "2012-11-24", "2013-01-30", "2012-11-11", "2012-11-18", "2012-11-25", "2013-01-20", "2013-01-13", 
            "2013-01-06", "2013-01-27", "2013-01-01", "2013-01-31",]

#journeypatterns dependent upon line data therefore not defined here
service_types = ["weekday","saturday","sunday"]


##1 Static Variables

In [3]:
line = lines[1]
date = timeframes[2]
service_type = service_types[2]

## Import Data For given Line

In [4]:
linedata = pd.read_csv("bus_data/line_data/"+line+".csv")
linedata.columns = ["Timestamp", "LineID", "JourneyPatternID", "TimeFrame", 
              "VehicleJourneyID", "Lon", "Lat", "VehicleID", "StopID", 
              "AtStop", "HumanTime", "Day", "Hour", "Runtime"]

In [5]:
linedata['HumanTime'] = pd.to_datetime(linedata['HumanTime'])

#### number of journey pattern ids should equal number of directions + variations on each direction

In [6]:
#Monday is 0, Sunday is 6
def get_day(x):
    if x.weekday() == 5:
        return "saturday"
    elif x.weekday() == 6:
        return "sunday"
    else:
        return "weekday"
    
linedata["day"] = linedata['HumanTime'].apply(get_day)  

In [7]:
# #service_type defined above @ static variables
# #df of all busses of that line with that service type
# service = linedata.loc[linedata['day'] == service_type]

# print("service_type JourneyPatternIDs:"+service_type)
# service.JourneyPatternID.value_counts()

### Extract Trips on a Single Day as day_trips

In [8]:
#staticvariable
# for given_day in linedata.loc[(linedata['TimeFrame'] == timeframe]
given_day = linedata.loc[(linedata['TimeFrame'] == date)]

In [9]:
day_trips = given_day.drop_duplicates("VehicleJourneyID", keep='first', inplace=False)
print("total number of trips on "+line+" on the "+date+" is:")
day_trips.JourneyPatternID.value_counts().sum()

total number of trips on 102 on the 2012-11-09 is:


61

In [10]:
print("Division of pattens:")
day_trips.JourneyPatternID.value_counts()

Division of pattens:


1001    44
1       11
1002     6
Name: JourneyPatternID, dtype: int64

# Does The most frequent journey pattern ID actually contain trips in both directions.?
## Create Dataframe for 1 JourneyPattern ID
### Intermediate Static Varaibles!!

In [11]:
journeypatterns = list(linedata.JourneyPatternID.unique())
vehiclejourneys = day_trips['VehicleJourneyID'].unique()

In [12]:
# #journeypatterns list defined above
# for journeypattern in journeypatterns:
#     journey_line_data = line_data[line_data.JourneyPatternID == jp]

In [13]:
directions = {}

directions = {}
for journey in vehiclejourneys:
    start = linedata[linedata.VehicleJourneyID == journey].iloc[0]
    end = linedata[linedata.VehicleJourneyID == journey].iloc[-1]
    direction = (start.Lat - end.Lat)
    directions[journey] = 1 if (direction > 0) else 0

In [14]:
#create dataframe mapping of VehicleJourneyID and its Direction
journeydirections_df = pd.DataFrame.from_dict(directions, orient='index')
journeydirections_df.reset_index(inplace=True)
journeydirections_df.columns = ["VehicleJourneyID","Southbound"]

In [15]:
print("DataFrames to merge are of same size: " + str(journeydirections_df.shape[0] == day_trips.VehicleJourneyID.nunique()))

DataFrames to merge are of same size: True


### merge data frames

In [16]:
merged_df = journeydirections_df.merge(day_trips)

In [17]:
merged_df['JourneyPatternID'].value_counts()

1001    44
1       11
1002     6
Name: JourneyPatternID, dtype: int64

In [18]:
#JourneyPatternIDs of Southbound Trips
merged_df.loc[(merged_df['Southbound'] == 1)].JourneyPatternID.value_counts()

1001    20
1        9
1002     2
Name: JourneyPatternID, dtype: int64

In [19]:
#JourneyPatternIDs of Northbound Trips
merged_df.loc[(merged_df['Southbound'] == 0)].JourneyPatternID.value_counts()

1001    24
1002     4
1        2
Name: JourneyPatternID, dtype: int64

In [20]:
#Number of Northbound Journeys
num_nbound = merged_df.loc[(merged_df['Southbound'] == 0)].shape[0]

In [21]:
#Number of Southbound Journeys
num_sbound = merged_df.loc[(merged_df['Southbound'] == 1)].shape[0]

## On a given day on a given line we can tell which journey pattern ids are in which direction

## Import Timetable

In [22]:
timetable = pd.read_csv("bus_data/static_data/master_timetable.csv")

In [23]:
#reassign columns to remove leading whitespace from variant column and name departure index
timetable.columns = ['departure_index', 'variant', 'service', 'departure', 'direction', 'line']

In [24]:
# schedule = timetable.loc[(((timetable['line'] == line)&(timetable['direction'] == directions[0]))&(timetable['service'] == service))]
# schedule.reset_index(inplace=True)

schedule = timetable.loc[(timetable['line'] == line)&(timetable['service'] == service_type)]
schedule.reset_index(inplace=True)

In [25]:
print(schedule.direction.value_counts()[0])
print(schedule.direction.value_counts()[1])
print("number of northbound trips: "+str(num_nbound))
print("number of southound trips: "+str(num_sbound))

13
13
number of northbound trips: 30
number of southound trips: 31


## Is There a Match?