# JourneyPatternID is a combination of Variant and Direction

In [47]:
import pandas as pd
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
%matplotlib inline

## Import 15 Timetable

In [137]:
timetable = pd.read_csv("bus_data/static_data/master_timetable.csv")

In [94]:
#reassign columns to remove leading whitespace from variant column and name departure index
timetable.columns = ['departure_index', 'variant', 'day', 'departure', 'direction', 'line']

In [95]:
weekdayfifteentimetable = timetable.loc[(timetable['line'] == "15") & (timetable['day'] == "weekday")]
weekdayfifteentimetable.variant.value_counts()

 c    3
Name: variant, dtype: int64

In [96]:
saturdayfifteentimetable = timetable.loc[(timetable['line'] == "15") & (timetable['day'] == "saturday")]
saturdayfifteentimetable.variant.value_counts()

 c    3
Name: variant, dtype: int64

In [97]:
sundayfifteentimetable = timetable.loc[(timetable['line'] == "15") & (timetable['day'] == "sunday")]
sundayfifteentimetable.variant.value_counts()

 c    2
Name: variant, dtype: int64

#### Refine by Direction

In [103]:
directions = (timetable.loc[(timetable['line'] == "15")]).direction.unique()
directions

array(['From Ballycullen Towards Clongriffin',
       'From Clongriffin Towards Ballycullen'], dtype=object)

In [108]:
northbound_weekdayfifteentimetable = weekdayfifteentimetable.loc[(weekdayfifteentimetable['direction'] == directions[0])]
northbound_weekdayfifteentimetable.reset_index(inplace=True)

In [128]:
northbound_weekdayfifteentimetable

Unnamed: 0,index,departure_index,variant,day,departure,direction,line
0,4422,0,,weekday,6:00,From Ballycullen Towards Clongriffin,15
1,4423,1,,weekday,6:10,From Ballycullen Towards Clongriffin,15
2,4424,2,,weekday,6:20,From Ballycullen Towards Clongriffin,15
3,4425,3,,weekday,6:30,From Ballycullen Towards Clongriffin,15
4,4426,4,,weekday,6:40,From Ballycullen Towards Clongriffin,15
5,4427,5,,weekday,6:50,From Ballycullen Towards Clongriffin,15
6,4428,6,,weekday,7:00,From Ballycullen Towards Clongriffin,15
7,4429,7,,weekday,7:12,From Ballycullen Towards Clongriffin,15
8,4430,8,,weekday,7:25,From Ballycullen Towards Clongriffin,15
9,4431,9,,weekday,7:35,From Ballycullen Towards Clongriffin,15


In [129]:
southbound_weekdayfifteentimetable = weekdayfifteentimetable.loc[(weekdayfifteentimetable['direction'] == directions[0])]
southbound_weekdayfifteentimetable.reset_index(inplace=True)

In [131]:
southbound_weekdayfifteentimetable.shape

(92, 7)

## Import 15 Data

In [48]:
fifteen = pd.read_csv("bus_data/line_data/15.csv")
fifteen.columns = ["Timestamp", "LineID", "JourneyPatternID", "TimeFrame", 
              "VehicleJourneyID", "Lon", "Lat", "VehicleID", "StopID", 
              "AtStop", "HumanTime", "Day", "Hour", "Runtime"]

In [77]:
fifteen['HumanTime'] = pd.to_datetime(fifteen['HumanTime'])

#### number of journey pattern ids should equal number of directions + variations on each direction

In [78]:
journeypatterns = list(fifteen.JourneyPatternID.unique())

In [79]:
journeypatterns

[1, 1001, 1002, 2]

In [80]:
#Monday is 0, Sunday is 6
def get_day(x):
    if x.weekday() == 5:
        return "saturday"
    elif x.weekday() == 6:
        return "sunday"
    else:
        return "weekday"
    
fifteen["day"] = fifteen['HumanTime'].apply(get_day)  

In [83]:
sunday = fifteen.loc[fifteen['day'] == "sunday"]
saturday = fifteen.loc[fifteen['day'] == "saturday"]
weekday = fifteen.loc[fifteen['day'] == "weekday"]

In [84]:
weekday.JourneyPatternID.value_counts()

1       110972
1001    102739
1002       526
2          158
Name: JourneyPatternID, dtype: int64

Extract Single Day and JourneyPatternID

In [86]:
saturday.JourneyPatternID.value_counts()

1       12157
1001    11304
1002      166
2          42
Name: JourneyPatternID, dtype: int64

In [87]:
sunday.JourneyPatternID.value_counts()

1       7332
1001    6838
1002      31
2         22
Name: JourneyPatternID, dtype: int64

Therefore there are 2 directions and 2 variations everyday

In [115]:
tuesday = fifteen.loc[(fifteen['TimeFrame'] == "2012-11-07")]


1       2944
1001    2540
1002      13
2          2
Name: JourneyPatternID, dtype: int64

In [118]:
tuesday_trips = tuesday.drop_duplicates("VehicleJourneyID", keep='first', inplace=False)
print("total number of trips on a single tuesday is:")
tuesday_trips.JourneyPatternID.value_counts().sum()

total number of trips on a single tuesday is:


183

In [126]:
print("which is almost the name as the number there ought to be on a single day:")
weekdayfifteentimetable.shape[0]

which is almost the name as the number there ought to be on a single day:


184

In [120]:
tuesday_trips = tuesday.drop_duplicates("VehicleJourneyID", keep='first', inplace=False)
print("But the JourneyPattern ID counts of those trips on a single tuesday are not evely divided:")
tuesday_trips.JourneyPatternID.value_counts()

But the JourneyPattern ID counts of those trips on a single tuesday are not evely divided:


1       132
1001     49
1002      1
2         1
Name: JourneyPatternID, dtype: int64

##### The JourneyPatternID "1" has too high a count to describe a single direction, it must describe 2.

In [132]:
tuesday_trips.JourneyPatternID.value_counts().sum()

183

In [136]:
tuesday_trips_oneway = tuesday_trips.loc[(tuesday_trips['JourneyPatternID'] == 1)]
tuesday_trips_oneway

Unnamed: 0,Timestamp,LineID,JourneyPatternID,TimeFrame,VehicleJourneyID,Lon,Lat,VehicleID,StopID,AtStop,HumanTime,Day,Hour,Runtime,day
5482,1352268002000000,15,1,2012-11-07,5891,-6.150383,53.402901,33621,6318,1,2012-11-07 06:00:02,2,6,0,weekday
5494,1352268883000000,15,1,2012-11-07,5899,-6.173066,53.402138,33498,4595,1,2012-11-07 06:14:43,2,6,341,weekday
5511,1352269408000000,15,1,2012-11-07,5902,-6.162333,53.401539,33499,6320,1,2012-11-07 06:23:28,2,6,281,weekday
5532,1352270072000000,15,1,2012-11-07,5912,-6.173066,53.402138,33622,4595,1,2012-11-07 06:34:32,2,6,361,weekday
5556,1352270428000000,15,1,2012-11-07,5920,-6.153783,53.402687,33502,6319,1,2012-11-07 06:40:28,2,6,59,weekday
5593,1352271050000000,15,1,2012-11-07,5930,-6.153783,53.402687,33515,6319,1,2012-11-07 06:50:50,2,6,0,weekday
5631,1352271645000000,15,1,2012-11-07,5937,-6.153783,53.402687,33340,6319,1,2012-11-07 07:00:45,2,7,260,weekday
5657,1352272012000000,15,1,2012-11-07,5945,-6.151516,53.402699,33546,6318,1,2012-11-07 07:06:52,2,7,0,weekday
5742,1352273144000000,15,1,2012-11-07,5952,-6.153783,53.402687,33547,6319,1,2012-11-07 07:25:44,2,7,121,weekday
5807,1352273810000000,15,1,2012-11-07,5892,-6.323367,53.271568,33621,6282,1,2012-11-07 07:36:50,2,7,0,weekday


### It is very difficult to infer which of these is in which direction

was there originally a direction collumn which was dropped which could be resused?