In [1]:
import pandas as pd
import folium
from folium.plugins import HeatMapWithTime

In [2]:
# read csv 
df = pd.read_csv("fhv_wav_201901-06_run-2019_08_16_.csv")

# drop nulls
df.dropna(subset=['Pickup_longitude'], inplace=True)
df.dropna(subset=['Pickup_latitude'], inplace=True)
df.dropna(subset=['DateTimeID'], inplace=True)
df.dropna(subset=['Pickup_DateTime'], inplace=True)

# remove useless columns
df = df [['Pickup_longitude','Pickup_latitude','DateTimeID','Pickup_DateTime']]

df.info()

  interactivity=interactivity, compiler=compiler, result=result)


<class 'pandas.core.frame.DataFrame'>
Int64Index: 763668 entries, 1 to 795205
Data columns (total 4 columns):
Pickup_longitude    763668 non-null object
Pickup_latitude     763668 non-null object
DateTimeID          763668 non-null int64
Pickup_DateTime     763668 non-null object
dtypes: int64(1), object(3)
memory usage: 29.1+ MB


In [3]:
# convert latitude and longitude to numeric
df['Pickup_longitude'] = pd.to_numeric(df['Pickup_longitude'], errors='coerce')
df['Pickup_latitude'] = pd.to_numeric(df['Pickup_latitude'], errors='coerce')

# reformat pickup_datetime
df.Pickup_DateTime = pd.to_datetime(df.Pickup_DateTime, format='%Y-%m-%d %H:%M:%S')

# create columns for time series
df['month'] = df.Pickup_DateTime.apply(lambda x: x.month)
df['week'] = df.Pickup_DateTime.apply(lambda x: x.week)
df['day'] = df.Pickup_DateTime.apply(lambda x: x.day)
df['hour'] = df.Pickup_DateTime.apply(lambda x: x.hour)

In [4]:
# Check for nulls
df.apply(lambda x: x.isnull().sum())

Pickup_longitude    466
Pickup_latitude     466
DateTimeID            0
Pickup_DateTime       0
month                 0
week                  0
day                   0
hour                  0
dtype: int64

In [5]:
df_copy = df[df.month>5].copy()
df_copy['count'] = 1

In [6]:
df_copy[['Pickup_latitude', 'Pickup_longitude', 'count']].groupby(['Pickup_latitude', 'Pickup_longitude']).sum().sort_values('count', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
Pickup_latitude,Pickup_longitude,Unnamed: 2_level_1
40.7696,-73.8628,33
40.7696,-73.8634,31
40.7697,-73.8632,25
40.7698,-73.8631,24
40.7521,-73.9355,23
40.6465,-73.7772,22
40.7696,-73.8629,22
40.6779,-73.9036,21
40.7697,-73.863,20
40.573856,-73.99879,20


In [7]:
df_hour_list = []
for hour in df_copy.hour.sort_values().unique():
    df_hour_list.append(df_copy.loc[df_copy.hour == hour, ['Pickup_latitude', 'Pickup_longitude', 'count']].groupby(['Pickup_latitude', 'Pickup_longitude']).sum().reset_index().values.tolist())

In [8]:
def generateBaseMap(default_location=[40.693943, -73.985880], default_zoom_start=11):
    base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
    return base_map

In [9]:
base_map = generateBaseMap(default_zoom_start=11)

In [10]:
HeatMapWithTime(df_hour_list, radius=5, gradient={0.2:'blue', 0.4:'lime', 0.6:'orange', 1:'red'}, min_opacity=0.5, max_opacity=0.8, auto_play=True, use_local_extrema=True).add_to(base_map)

<folium.plugins.heat_map_withtime.HeatMapWithTime at 0x2067826d4e0>

In [11]:
base_map.save('Time Series Map.html')