In [1]:
import modules.data as data
uber_data = data.getJul14Data()

In [12]:
# Folium provides the ability to map geographical data, firstly by getting a base map with lat and lon
import folium as folium
uber_map = folium.Map(location=[40.7128, -74.006], zoom_start=12)
uber_map

In [13]:
# Looking at the first five rides placed on map
latitudes = uber_data['Lat'].values[:5]
longitudes = uber_data['Lon'].values[:5]

marker_map = folium.Map(location=[40.7128, -74.006], zoom_start=12)
for i in range(len(latitudes)):
    folium.Marker((latitudes[i],longitudes[i]), popup='Rider {}'.format(i+1)).add_to(marker_map)
    
marker_map

In [19]:
from folium.plugins import HeatMap

lat_lon = uber_data[['Lat', 'Lon']].values[:10000]
heat_map = folium.Map(location=[40.7128, -74.006], zoom_start=12)
HeatMap(lat_lon, radius=13).add_to(heat_map) 
heat_map

In [22]:
import modules.data_kit as data_kit
nyc_map = data_kit.get_centered_nyc_map()
# nyc_map just testing
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [28]:
metro_art_coords = (40.7794, -73.9632)
empire_state_coords = (40.7484, -73.9857)

uber_data = data_kit.add_distance_to_poi_col(uber_data, metro_art_coords, 'Distance to the Met')
uber_data = data_kit.add_distance_to_poi_col(uber_data, empire_state_coords, 'Distance to Empire State')

uber_data.head(5)

Unnamed: 0,Date/Time,Lat,Lon,Base,Distance to the Met,Distance to Empire State
0,2014-07-01 00:03:00,40.7586,-73.9706,B02512,1.487358,1.058178
1,2014-07-01 00:05:00,40.7605,-73.9994,B02512,2.29914,1.100642
2,2014-07-01 00:06:00,40.732,-73.9999,B02512,3.794105,1.354266
3,2014-07-01 00:09:00,40.7635,-73.9793,B02512,1.38345,1.094999
4,2014-07-01 00:20:00,40.7204,-74.0047,B02512,4.615925,2.173858


In [29]:
close_to_poi = uber_data[['Distance to the Met', 'Distance to Empire State']] < .25
close_to_poi

Unnamed: 0,Distance to the Met,Distance to Empire State
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
796116,False,False
796117,False,False
796118,False,False
796119,False,False


In [32]:
index_close_to = close_to_poi.any(axis=1)
index_close_to.head(6)

0    False
1    False
2    False
3    False
4    False
5     True
dtype: bool

In [33]:
map_data = uber_data[index_close_to].copy()
map_data

Unnamed: 0,Date/Time,Lat,Lon,Base,Distance to the Met,Distance to Empire State
5,2014-07-01 00:35:00,40.7487,-73.9869,B02512,2.455439,0.066098
13,2014-07-01 02:07:00,40.7471,-73.9872,B02512,2.559080,0.119218
17,2014-07-01 03:20:00,40.7498,-73.9813,B02512,2.252287,0.249626
31,2014-07-01 05:22:00,40.7489,-73.9874,B02512,2.456893,0.095389
104,2014-07-01 07:39:00,40.7782,-73.9590,B02512,0.234702,2.486594
...,...,...,...,...,...,...
795863,2014-07-31 13:02:00,40.7463,-73.9888,B02764,2.648655,0.217527
795910,2014-07-31 15:35:00,40.7460,-73.9864,B02764,2.605797,0.169706
795925,2014-07-31 16:05:00,40.7449,-73.9866,B02764,2.678014,0.246201
795940,2014-07-31 16:32:00,40.7501,-73.9825,B02764,2.260816,0.204436


In [46]:
# Look at where the drop offs are when close to the landmarks
landmarks_map = data_kit.get_centered_nyc_map()

# not subscriptable error 
# map_data[['Lat', 'Lon']].apply(lambda x: HeatMap(x, radius=0.5).add_to(landmarks_map))

lat_lon = map_data[['Lat', 'Lon']].values
HeatMap(lat_lon, radius=4).add_to(landmarks_map)

landmarks_map

In [None]:
map_data.index

In [49]:
# Grab only the day and the hour 
uber_data["Date_Hour"] = uber_data["Date/Time"].apply(lambda x: x.replace(microsecond=0,second=0,minute=0))
uber_data["Date_Hour"]

0        2014-07-01 00:00:00
1        2014-07-01 00:00:00
2        2014-07-01 00:00:00
3        2014-07-01 00:00:00
4        2014-07-01 00:00:00
                 ...        
796116   2014-07-31 23:00:00
796117   2014-07-31 23:00:00
796118   2014-07-31 23:00:00
796119   2014-07-31 23:00:00
796120   2014-07-31 23:00:00
Name: Date_Hour, Length: 796121, dtype: datetime64[ns]

In [50]:
from datetime import datetime
# For the moment it is enough to just examine one week of data
map_data = uber_data[uber_data["Date/Time"] < datetime(2014,7,8)].copy()
map_data["Weight"] = .5
#Randomly sample 1/3 the values in each group
map_data = map_data.groupby('Date_Hour').apply(lambda x: x[["Lat", "Lon", "Weight"]].sample(int(len(x)/3)).values.tolist())
#Get the index
date_hour_index = [x.strftime("%m/%d/%Y, %H:%M:%S") for x in map_data.index]
#Get the data in list form (each element of this bigger list will be a list of lists with lat/lon/weight)
#Each element of the bigger list is a for a date/hour combo
date_hour_data = map_data.tolist()

In [51]:
from folium.plugins import HeatMapWithTime
time_heat_map = data_kit.get_centered_nyc_map()
#A heatmap with time can now be out together
hm = HeatMapWithTime(date_hour_data, index=date_hour_index)
hm.add_to(time_heat_map)
time_heat_map