# Week 9
Let's continue the analysis of the NYC Bus data. You should already have downloaded the file `mta_1706.csv`.

In [1]:
# Load the data from the csv file
import pandas as pd
data = pd.read_csv("mta_1706.csv", on_bad_lines='skip') # This may take some time because the file is large
data.head()

Unnamed: 0,RecordedAtTime,DirectionRef,PublishedLineName,OriginName,OriginLat,OriginLong,DestinationName,DestinationLat,DestinationLong,VehicleRef,VehicleLocation.Latitude,VehicleLocation.Longitude,NextStopPointName,ArrivalProximityText,DistanceFromStop,ExpectedArrivalTime,ScheduledArrivalTime
0,2017-06-01 00:03:34,0,B8,4 AV/95 ST,40.616104,-74.031143,BROWNSVILLE ROCKAWAY AV,40.656048,-73.907379,NYCT_430,40.63517,-73.960803,FOSTER AV/E 18 ST,approaching,76.0,2017-06-01 00:03:59,24:06:14
1,2017-06-01 00:03:43,1,S61,ST GEORGE FERRY/S61 & S91,40.643169,-74.073494,S I MALL YUKON AV,40.575935,-74.167686,NYCT_8263,40.590802,-74.15834,MERRYMOUNT ST/TRAVIS AV,approaching,62.0,2017-06-01 00:03:56,23:58:02
2,2017-06-01 00:03:49,0,Bx10,E 206 ST/BAINBRIDGE AV,40.875008,-73.880142,RIVERDALE 263 ST,40.912376,-73.902534,NYCT_4223,40.88601,-73.912647,HENRY HUDSON PKY E/W 235 ST,at stop,5.0,2017-06-01 00:03:56,24:00:53
3,2017-06-01 00:03:31,0,Q5,TEARDROP/LAYOVER,40.701748,-73.802399,ROSEDALE LIRR STA via MERRICK,40.666012,-73.735939,NYCT_8422,40.668002,-73.729348,HOOK CREEK BL/SUNRISE HY,< 1 stop away,267.0,2017-06-01 00:04:03,24:03:00
4,2017-06-01 00:03:22,1,Bx1,RIVERDALE AV/W 231 ST,40.881187,-73.90934,MOTT HAVEN 136 ST via CONCOURSE,40.809654,-73.92836,NYCT_4710,40.868134,-73.893032,GRAND CONCOURSE/E 196 ST,at stop,11.0,2017-06-01 00:03:56,23:59:38


In [2]:
# The full dataset is too large. Let's only keep data on June 1st, 2017.
data['RecordedAtTime'] = pd.to_datetime(data['RecordedAtTime'])
data = data[data['RecordedAtTime'].dt.day == 1]
data = data.drop_duplicates()
print("Shape:", data.shape)
data.head()

Shape: (248877, 17)


Unnamed: 0,RecordedAtTime,DirectionRef,PublishedLineName,OriginName,OriginLat,OriginLong,DestinationName,DestinationLat,DestinationLong,VehicleRef,VehicleLocation.Latitude,VehicleLocation.Longitude,NextStopPointName,ArrivalProximityText,DistanceFromStop,ExpectedArrivalTime,ScheduledArrivalTime
0,2017-06-01 00:03:34,0,B8,4 AV/95 ST,40.616104,-74.031143,BROWNSVILLE ROCKAWAY AV,40.656048,-73.907379,NYCT_430,40.63517,-73.960803,FOSTER AV/E 18 ST,approaching,76.0,2017-06-01 00:03:59,24:06:14
1,2017-06-01 00:03:43,1,S61,ST GEORGE FERRY/S61 & S91,40.643169,-74.073494,S I MALL YUKON AV,40.575935,-74.167686,NYCT_8263,40.590802,-74.15834,MERRYMOUNT ST/TRAVIS AV,approaching,62.0,2017-06-01 00:03:56,23:58:02
2,2017-06-01 00:03:49,0,Bx10,E 206 ST/BAINBRIDGE AV,40.875008,-73.880142,RIVERDALE 263 ST,40.912376,-73.902534,NYCT_4223,40.88601,-73.912647,HENRY HUDSON PKY E/W 235 ST,at stop,5.0,2017-06-01 00:03:56,24:00:53
3,2017-06-01 00:03:31,0,Q5,TEARDROP/LAYOVER,40.701748,-73.802399,ROSEDALE LIRR STA via MERRICK,40.666012,-73.735939,NYCT_8422,40.668002,-73.729348,HOOK CREEK BL/SUNRISE HY,< 1 stop away,267.0,2017-06-01 00:04:03,24:03:00
4,2017-06-01 00:03:22,1,Bx1,RIVERDALE AV/W 231 ST,40.881187,-73.90934,MOTT HAVEN 136 ST via CONCOURSE,40.809654,-73.92836,NYCT_4710,40.868134,-73.893032,GRAND CONCOURSE/E 196 ST,at stop,11.0,2017-06-01 00:03:56,23:59:38


In [5]:
# Let's examine Bx26
Bx26 = data[data['PublishedLineName'] == 'Bx26']
print("Shape:", Bx26.shape)
Bx26.head()

Shape: (972, 17)


Unnamed: 0,RecordedAtTime,DirectionRef,PublishedLineName,OriginName,OriginLat,OriginLong,DestinationName,DestinationLat,DestinationLong,VehicleRef,VehicleLocation.Latitude,VehicleLocation.Longitude,NextStopPointName,ArrivalProximityText,DistanceFromStop,ExpectedArrivalTime,ScheduledArrivalTime
12311,2017-06-01 05:33:52,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4415,40.86136,-73.822817,HUNTER AV/EARHART LA,< 1 stop away,216.0,2017-06-01 05:34:10,05:35:37
13282,2017-06-01 05:43:36,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4415,40.867572,-73.835474,BARTOW AV/BRUNNER AV,at stop,29.0,2017-06-01 05:43:54,05:43:36
14370,2017-06-01 05:53:29,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4415,40.865545,-73.861927,ALLERTON AV/BOSTON RD,at stop,0.0,2017-06-01 05:53:52,05:52:53
14529,2017-06-01 05:53:45,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4416,40.86951,-73.825654,BARTOW AV/CO-OP CITY BL,at stop,19.0,2017-06-01 05:53:52,05:54:04
15652,2017-06-01 06:03:23,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4415,40.867914,-73.884166,BEDFORD PK BL/MARION AV,approaching,140.0,2017-06-01 06:04:26,06:00:42


In [17]:
# Let's visualize the route
# Center of the map: 40.87101179741048, -73.86352516801689
import folium

bus_map = folium.Map(location=[40.87101179741048, -73.86352516801689],
           zoom_start=13)
# row = 12311
# bus_location = [Bx26.loc[row, "VehicleLocation.Latitude"],
#                 Bx26.loc[row, "VehicleLocation.Longitude"]]
# folium.Marker(bus_location, popup=row).add_to(bus_map)

# for row in Bx26.index:
#     bus_location = [Bx26.loc[row, "VehicleLocation.Latitude"],
#                     Bx26.loc[row, "VehicleLocation.Longitude"]]
#     folium.Marker(bus_location, popup=row).add_to(bus_map)
    
# Visualing the route with markers is not very beautiful. Let's use a different
# approach.
for row in Bx26.index:
    bus_location = [Bx26.loc[row, "VehicleLocation.Latitude"],
                    Bx26.loc[row, "VehicleLocation.Longitude"]]
    time = Bx26.loc[row, "RecordedAtTime"]
    folium.Circle(bus_location, radius=0.1, popup=time).add_to(bus_map)

    
bus_map

In [25]:
# Calculate the wating time
from datetime import datetime
location = [40.874634210385175, -73.89178270047667] # Bx26 stop at LC
time = datetime(2017, 6, 1, 12, 14, 0)
print(time)

2017-06-01 12:14:00


In [29]:
# Find the row around this time
Bx26 = Bx26.copy()
Bx26['TimeDiff'] = time - Bx26['RecordedAtTime']
Bx26.head()

Unnamed: 0,RecordedAtTime,DirectionRef,PublishedLineName,OriginName,OriginLat,OriginLong,DestinationName,DestinationLat,DestinationLong,VehicleRef,VehicleLocation.Latitude,VehicleLocation.Longitude,NextStopPointName,ArrivalProximityText,DistanceFromStop,ExpectedArrivalTime,ScheduledArrivalTime,TimeDiff
12311,2017-06-01 05:33:52,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4415,40.86136,-73.822817,HUNTER AV/EARHART LA,< 1 stop away,216.0,2017-06-01 05:34:10,05:35:37,0 days 06:40:08
13282,2017-06-01 05:43:36,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4415,40.867572,-73.835474,BARTOW AV/BRUNNER AV,at stop,29.0,2017-06-01 05:43:54,05:43:36,0 days 06:30:24
14370,2017-06-01 05:53:29,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4415,40.865545,-73.861927,ALLERTON AV/BOSTON RD,at stop,0.0,2017-06-01 05:53:52,05:52:53,0 days 06:20:31
14529,2017-06-01 05:53:45,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4416,40.86951,-73.825654,BARTOW AV/CO-OP CITY BL,at stop,19.0,2017-06-01 05:53:52,05:54:04,0 days 06:20:15
15652,2017-06-01 06:03:23,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4415,40.867914,-73.884166,BEDFORD PK BL/MARION AV,approaching,140.0,2017-06-01 06:04:26,06:00:42,0 days 06:10:37


In [32]:
# Get the row that's closest to the target time
from datetime import timedelta
Bx26[Bx26['TimeDiff'] > timedelta(0)].sort_values('TimeDiff').head(3)

Unnamed: 0,RecordedAtTime,DirectionRef,PublishedLineName,OriginName,OriginLat,OriginLong,DestinationName,DestinationLat,DestinationLong,VehicleRef,VehicleLocation.Latitude,VehicleLocation.Longitude,NextStopPointName,ArrivalProximityText,DistanceFromStop,ExpectedArrivalTime,ScheduledArrivalTime,TimeDiff
100096,2017-06-01 12:03:54,0,Bx26,W 205 ST/PAUL AV,40.876999,-73.889854,CO-OP CITY EARHART LA via ALLERTN,40.860828,-73.82267,NYCT_4416,40.865517,-73.859921,ALLERTON AV/WILLIAMSBRIDGE RD,approaching,71.0,2017-06-01 12:04:31,12:01:00,0 days 00:10:06
99896,2017-06-01 12:03:53,0,Bx26,W 205 ST/PAUL AV,40.876999,-73.889854,CO-OP CITY EARHART LA via ALLERTN,40.860828,-73.82267,NYCT_4415,40.873974,-73.890515,BEDFORD PK/JEROME AV,approaching,100.0,2017-06-01 12:04:28,11:57:21,0 days 00:10:07
100112,2017-06-01 12:03:51,0,Bx26,W 205 ST/PAUL AV,40.876999,-73.889854,CO-OP CITY EARHART LA via ALLERTN,40.860828,-73.82267,NYCT_4106,40.865849,-73.82327,HUTCHINSON RVR PKY/EINSTEIN LOOP N,approaching,60.0,2017-06-01 12:04:03,12:03:28,0 days 00:10:09


In [38]:
df = Bx26[Bx26['TimeDiff'] <= timedelta(0)].sort_values('TimeDiff',
                        ascending=False).head(8)

In [41]:
for row in df.index:
    bus_location = [df.loc[row, "VehicleLocation.Latitude"],
                    df.loc[row, "VehicleLocation.Longitude"]]
    bus_ref = df.loc[row, "VehicleRef"]
    direction = df.loc[row, "DirectionRef"]
    folium.Marker(bus_location, popup=direction).add_to(bus_map)

In [42]:
bus_map

In [50]:
direction = 1
# How do we find the time a Bx26 comes to the LC stop?
stop_name = "BEDFORD PK/PAUL AV"
df = Bx26[(Bx26["NextStopPointName"] == stop_name) & \
          (Bx26["ArrivalProximityText"] == 'at stop')]
df.head()

Unnamed: 0,RecordedAtTime,DirectionRef,PublishedLineName,OriginName,OriginLat,OriginLong,DestinationName,DestinationLat,DestinationLong,VehicleRef,VehicleLocation.Latitude,VehicleLocation.Longitude,NextStopPointName,ArrivalProximityText,DistanceFromStop,ExpectedArrivalTime,ScheduledArrivalTime,TimeDiff
50205,2017-06-01 08:23:54,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_8329,40.874348,-73.891349,BEDFORD PK/PAUL AV,at stop,17.0,2017-06-01 08:24:01,08:14:39,0 days 03:50:06
55450,2017-06-01 08:44:07,1,Bx26,EARHART LA/ERSKINE PL,40.861115,-73.822548,BEDFORD PK LEHMAN COLL via ALLERTN,40.877071,-73.890068,NYCT_4411,40.874364,-73.891384,BEDFORD PK/PAUL AV,at stop,13.0,2017-06-01 08:44:33,08:29:39,0 days 03:29:53


In [48]:
# Bx26['NextStopPointName'].unique()
Bx26['ArrivalProximityText'].unique()

array(['< 1 stop away', 'at stop', 'approaching', '0.5 miles away',
       '0.6 miles away', '0.7 miles away'], dtype=object)