Visualizing Bus Traffic in New York City

In [None]:
from folium.plugins import TimeSliderChoropleth

Problem Statement: “To visualize the traffic due to buses in New York City throughout the day.”

In [None]:
import pandas as pd
import numpy as np

In [None]:
df=pd.read_csv( "/content/mta_1706.csv", error_bad_lines=False)
df.head(5)

Unnamed: 0,RecordedAtTime,DirectionRef,PublishedLineName,OriginName,OriginLat,OriginLong,DestinationName,DestinationLat,DestinationLong,VehicleRef,VehicleLocation.Latitude,VehicleLocation.Longitude,NextStopPointName,ArrivalProximityText,DistanceFromStop,ExpectedArrivalTime,ScheduledArrivalTime
0,2017-06-01 00:03:34,0,B8,4 AV/95 ST,40.616104,-74.031143,BROWNSVILLE ROCKAWAY AV,40.656048,-73.907379,NYCT_430,40.63517,-73.960803,FOSTER AV/E 18 ST,approaching,76.0,2017-06-01 00:03:59,24:06:14
1,2017-06-01 00:03:43,1,S61,ST GEORGE FERRY/S61 & S91,40.643169,-74.073494,S I MALL YUKON AV,40.575935,-74.167686,NYCT_8263,40.590802,-74.15834,MERRYMOUNT ST/TRAVIS AV,approaching,62.0,2017-06-01 00:03:56,23:58:02
2,2017-06-01 00:03:49,0,Bx10,E 206 ST/BAINBRIDGE AV,40.875008,-73.880142,RIVERDALE 263 ST,40.912376,-73.902534,NYCT_4223,40.88601,-73.912647,HENRY HUDSON PKY E/W 235 ST,at stop,5.0,2017-06-01 00:03:56,24:00:53
3,2017-06-01 00:03:31,0,Q5,TEARDROP/LAYOVER,40.701748,-73.802399,ROSEDALE LIRR STA via MERRICK,40.666012,-73.735939,NYCT_8422,40.668002,-73.729348,HOOK CREEK BL/SUNRISE HY,< 1 stop away,267.0,2017-06-01 00:04:03,24:03:00
4,2017-06-01 00:03:22,1,Bx1,RIVERDALE AV/W 231 ST,40.881187,-73.90934,MOTT HAVEN 136 ST via CONCOURSE,40.809654,-73.92836,NYCT_4710,40.868134,-73.893032,GRAND CONCOURSE/E 196 ST,at stop,11.0,2017-06-01 00:03:56,23:59:38


In [None]:
df=df[df['RecordedAtTime'].str.split(' ').apply(lambda x:x[0]=='2017-06-01')]
df.shape

(49609, 17)

In [None]:
df=df[['RecordedAtTime','VehicleRef','VehicleLocation.Latitude','VehicleLocation.Longitude']]
df.head()

Unnamed: 0,RecordedAtTime,VehicleRef,VehicleLocation.Latitude,VehicleLocation.Longitude
0,2017-06-01 00:03:34,NYCT_430,40.63517,-73.960803
1,2017-06-01 00:03:43,NYCT_8263,40.590802,-74.15834
2,2017-06-01 00:03:49,NYCT_4223,40.88601,-73.912647
3,2017-06-01 00:03:31,NYCT_8422,40.668002,-73.729348
4,2017-06-01 00:03:22,NYCT_4710,40.868134,-73.893032


In [None]:
df.duplicated().value_counts()

False    46858
True      2751
dtype: int64

In [None]:
df=df.drop_duplicates()

In [None]:
df.isnull().sum()

RecordedAtTime               0
VehicleRef                   1
VehicleLocation.Latitude     1
VehicleLocation.Longitude    1
dtype: int64

In [None]:
df['RecordedAtTime']=pd.to_datetime(df['RecordedAtTime'],format='%Y-%m-%d %H:%M:%S')
df['hour']=df['RecordedAtTime'].apply(lambda x: x.hour+1)
df.head()

Unnamed: 0,RecordedAtTime,VehicleRef,VehicleLocation.Latitude,VehicleLocation.Longitude,hour
0,2017-06-01 00:03:34,NYCT_430,40.63517,-73.960803,1
1,2017-06-01 00:03:43,NYCT_8263,40.590802,-74.15834,1
2,2017-06-01 00:03:49,NYCT_4223,40.88601,-73.912647,1
3,2017-06-01 00:03:31,NYCT_8422,40.668002,-73.729348,1
4,2017-06-01 00:03:22,NYCT_4710,40.868134,-73.893032,1


In [None]:
df2=pd.DataFrame(df.groupby(['hour','VehicleRef'])['RecordedAtTime'].max())
df2.reset_index(inplace=True)
df2.head()

Unnamed: 0,hour,VehicleRef,RecordedAtTime
0,1,NYCT_1206,2017-06-01 00:52:59
1,1,NYCT_1208,2017-06-01 00:03:49
2,1,NYCT_1212,2017-06-01 00:53:10
3,1,NYCT_1213,2017-06-01 00:53:18
4,1,NYCT_1218,2017-06-01 00:53:18


In [None]:
df3=pd.merge(df2,df,left_on=['hour','VehicleRef','RecordedAtTime'],right_on=['hour','VehicleRef','RecordedAtTime'])
df3.head()

Unnamed: 0,hour,VehicleRef,RecordedAtTime,VehicleLocation.Latitude,VehicleLocation.Longitude
0,1,NYCT_1206,2017-06-01 00:52:59,40.813268,-73.930409
1,1,NYCT_1208,2017-06-01 00:03:49,40.895555,-73.896672
2,1,NYCT_1212,2017-06-01 00:53:10,40.876857,-73.902408
3,1,NYCT_1213,2017-06-01 00:53:18,40.888707,-73.859971
4,1,NYCT_1218,2017-06-01 00:53:18,40.862368,-73.913939


In [None]:
lat_long_list = []
for i in range(1,25):
    temp=[]
    for index, instance in df3[df3['hour'] == i].iterrows():
        temp.append([instance['VehicleLocation.Latitude'],instance['VehicleLocation.Longitude']])
    lat_long_list.append(temp)

In [None]:
import matplotlib
import matplotlib.pyplot as plt

ModuleNotFoundError: ignored

In [None]:
from folium.plugins import HeatMapWithTime 
fig7=Figure(width=850,height=550)
m7=folium.Map(location=[40.712776, -74.005974],zoom_start=10)
fig7.add_child(m7)

TypeError: ignored

NameError: ignored

In [None]:
HeatMapWithTime(lat_long_list,radius=5,auto_play=True,position='bottomright').add_to(m7)
m7

NameError: ignored

In [None]:
m7.save('m7.html')