# JourneyPatternID is a combination of Variant and Direction

In [57]:
import pandas as pd
import functools

from datetime import datetime, timedelta

import matplotlib.pyplot as plt
%matplotlib inline

## Import 15 Timetable

In [2]:
timetable = pd.read_csv("bus_data/static_data/master_timetable.csv")

In [3]:
#reassign columns to remove leading whitespace from variant column and name departure index
timetable.columns = ['departure_index', 'variant', 'day', 'departure', 'direction', 'line']

In [4]:
weekdayfifteentimetable = timetable.loc[(timetable['line'] == "15") & (timetable['day'] == "weekday")]
weekdayfifteentimetable.variant.value_counts()

 c    3
Name: variant, dtype: int64

In [5]:
saturdayfifteentimetable = timetable.loc[(timetable['line'] == "15") & (timetable['day'] == "saturday")]
saturdayfifteentimetable.variant.value_counts()

 c    3
Name: variant, dtype: int64

In [6]:
sundayfifteentimetable = timetable.loc[(timetable['line'] == "15") & (timetable['day'] == "sunday")]
sundayfifteentimetable.variant.value_counts()

 c    2
Name: variant, dtype: int64

#### Refine by Direction

In [7]:
directions = (timetable.loc[(timetable['line'] == "15")]).direction.unique()
directions

array(['From Ballycullen Towards Clongriffin',
       'From Clongriffin Towards Ballycullen'], dtype=object)

In [8]:
northbound_weekdayfifteentimetable = weekdayfifteentimetable.loc[(weekdayfifteentimetable['direction'] == directions[0])]
northbound_weekdayfifteentimetable.reset_index(inplace=True)

In [9]:
northbound_weekdayfifteentimetable

Unnamed: 0,index,departure_index,variant,day,departure,direction,line
0,4422,0,,weekday,6:00,From Ballycullen Towards Clongriffin,15
1,4423,1,,weekday,6:10,From Ballycullen Towards Clongriffin,15
2,4424,2,,weekday,6:20,From Ballycullen Towards Clongriffin,15
3,4425,3,,weekday,6:30,From Ballycullen Towards Clongriffin,15
4,4426,4,,weekday,6:40,From Ballycullen Towards Clongriffin,15
5,4427,5,,weekday,6:50,From Ballycullen Towards Clongriffin,15
6,4428,6,,weekday,7:00,From Ballycullen Towards Clongriffin,15
7,4429,7,,weekday,7:12,From Ballycullen Towards Clongriffin,15
8,4430,8,,weekday,7:25,From Ballycullen Towards Clongriffin,15
9,4431,9,,weekday,7:35,From Ballycullen Towards Clongriffin,15


In [10]:
southbound_weekdayfifteentimetable = weekdayfifteentimetable.loc[(weekdayfifteentimetable['direction'] == directions[0])]
southbound_weekdayfifteentimetable.reset_index(inplace=True)

In [11]:
southbound_weekdayfifteentimetable.shape

(92, 7)

## Import 15 Data

In [12]:
fifteen = pd.read_csv("bus_data/line_data/15.csv")
fifteen.columns = ["Timestamp", "LineID", "JourneyPatternID", "TimeFrame", 
              "VehicleJourneyID", "Lon", "Lat", "VehicleID", "StopID", 
              "AtStop", "HumanTime", "Day", "Hour", "Runtime"]

In [13]:
fifteen['HumanTime'] = pd.to_datetime(fifteen['HumanTime'])

#### number of journey pattern ids should equal number of directions + variations on each direction

In [14]:
journeypatterns = list(fifteen.JourneyPatternID.unique())

In [15]:
journeypatterns

[1, 1001, 1002, 2]

In [16]:
#Monday is 0, Sunday is 6
def get_day(x):
    if x.weekday() == 5:
        return "saturday"
    elif x.weekday() == 6:
        return "sunday"
    else:
        return "weekday"
    
fifteen["day"] = fifteen['HumanTime'].apply(get_day)  

In [17]:
sunday = fifteen.loc[fifteen['day'] == "sunday"]
saturday = fifteen.loc[fifteen['day'] == "saturday"]
weekday = fifteen.loc[fifteen['day'] == "weekday"]

In [18]:
weekday.JourneyPatternID.value_counts()

1       110972
1001    102739
1002       526
2          158
Name: JourneyPatternID, dtype: int64

Extract Single Day and JourneyPatternID

In [19]:
saturday.JourneyPatternID.value_counts()

1       12157
1001    11304
1002      166
2          42
Name: JourneyPatternID, dtype: int64

In [20]:
sunday.JourneyPatternID.value_counts()

1       7332
1001    6838
1002      31
2         22
Name: JourneyPatternID, dtype: int64

Therefore there are 2 directions and 2 variations everyday

In [21]:
tuesday = fifteen.loc[(fifteen['TimeFrame'] == "2012-11-14")]


In [22]:
tuesday_trips = tuesday.drop_duplicates("VehicleJourneyID", keep='first', inplace=False)
print("total number of trips on a single tuesday is:")
tuesday_trips.JourneyPatternID.value_counts().sum()

total number of trips on a single tuesday is:


181

In [23]:
print("which is almost the name as the number there ought to be on a single day:")
weekdayfifteentimetable.shape[0]

which is almost the name as the number there ought to be on a single day:


184

In [24]:
tuesday_trips = tuesday.drop_duplicates("VehicleJourneyID", keep='first', inplace=False)
print("But the JourneyPattern ID counts of those trips on a single tuesday are not evely divided:")
tuesday_trips.JourneyPatternID.value_counts()

But the JourneyPattern ID counts of those trips on a single tuesday are not evely divided:


1       140
1001     40
2         1
Name: JourneyPatternID, dtype: int64

##### The JourneyPatternID "1" has too high a count to describe a single direction, it must describe 2.

In [25]:
tuesday_trips.JourneyPatternID.value_counts().sum()

181

In [26]:
tuesday_trips_oneway = tuesday_trips.loc[(tuesday_trips['JourneyPatternID'] == 1)]
tuesday_trips_oneway

Unnamed: 0,Timestamp,LineID,JourneyPatternID,TimeFrame,VehicleJourneyID,Lon,Lat,VehicleID,StopID,AtStop,HumanTime,Day,Hour,Runtime,day
38440,1352872804000000,15,1,2012-11-14,5891,-6.150500,53.402950,33553,6318,1,2012-11-14 06:00:04,2,6,0,weekday
38459,1352873786000000,15,1,2012-11-14,5899,-6.181091,53.401699,33498,4563,1,2012-11-14 06:16:26,2,6,699,weekday
38476,1352874268000000,15,1,2012-11-14,5902,-6.173066,53.402138,33499,4595,1,2012-11-14 06:24:28,2,6,474,weekday
38504,1352874843000000,15,1,2012-11-14,5912,-6.163521,53.399288,33500,4594,1,2012-11-14 06:34:03,2,6,0,weekday
38544,1352875562000000,15,1,2012-11-14,5920,-6.177130,53.402370,33502,4596,1,2012-11-14 06:46:02,2,6,373,weekday
38575,1352876052000000,15,1,2012-11-14,5930,-6.163521,53.399288,33282,4594,1,2012-11-14 06:54:12,2,6,578,weekday
38614,1352876669000000,15,1,2012-11-14,5937,-6.173066,53.402138,35006,4595,1,2012-11-14 07:04:29,2,7,338,weekday
38658,1352877289000000,15,1,2012-11-14,5945,-6.157236,53.402843,33501,7246,1,2012-11-14 07:14:49,2,7,419,weekday
38701,1352877962000000,15,1,2012-11-14,5952,-6.157236,53.402843,33222,7246,1,2012-11-14 07:26:02,2,7,381,weekday
38758,1352878712000000,15,1,2012-11-14,5844,-6.162333,53.401539,33016,6320,1,2012-11-14 07:38:32,2,7,534,weekday


### It is very difficult to infer which of these is in which direction

was there originally a direction collumn which was dropped which could be resused?

Does The most frequent journey pattern ID actually contain trips in both directions.

### Finding direction of journeys

In [39]:
df = pd.read_csv("bus_data/line_data/15.csv")
df.columns = ["Timestamp", "LineID", "JourneyPatternID", "TimeFrame", 
              "VehicleJourneyID", "Lon", "Lat", "VehicleID", "StopID", 
              "AtStop", "HumanTime", "Day", "Hour", "Runtime"]

In [44]:
df = df[df.JourneyPatternID == 1]

In [59]:
journey_df = df.drop_duplicates('VehicleJourneyID')

In [52]:
journeys = df['VehicleJourneyID'].unique()

In [67]:
directions = {}
for journey in journeys:
    start = df[df.VehicleJourneyID == journey].iloc[0]
    end = df[df.VehicleJourneyID == journey].iloc[-1]
    direction = (start.Lat - end.Lat)
    directions[journey] = 1 if (direction > 0) else 0

In [73]:
#  new_df = pd.DataFrame([directions], columns=['VehiclejourneyID', 'Direction'])
new_df = pd.DataFrame.from_dict(directions, orient='index')

In [75]:
new_df.head(100)

Unnamed: 0,0
5891,1
5899,1
5902,1
5920,1
5912,1
5930,1
5937,1
5945,1
5952,1
5844,1


In [55]:
def add_direction(row, dictionary):
    direction = dictionary[row.VehicleJourneyID]
    return direction

In [58]:
df['Direction'] = df.apply(add_direction, args=(directions), axis=1)


# df['Direction'] = df.apply(add_direction, args=(directions), axis=1)

# my_series.apply((lambda x: your_func(a,b,c,d,...,x)))

TypeError: ('add_direction() takes 2 positional arguments but 1189 were given', 'occurred at index 0')