# Geospatial Data Analysis

# Pickup, Dropoff Visualization on DC Map using Folium

**We shall shuffle our dataset first and then use the first 1000 observations for the convenience of plotting and visualization. Note that the green markers with a label of the play icon denote pick up locations and red markers with a label of the stop icon denote dropoff locations.**

In [110]:
import pandas as pd             #pandas for using dataframe and reading csv file(s)
import numpy as np              #numpy for vector operations and basic maths
import matplotlib.pyplot as plt #for plotting
%matplotlib inline              
import seaborn as sns           #for making plots
from haversine import haversine #for working with latitudinal and longitudinal data points
import math                     #for basic math operations
import warnings
from pandas.plotting import parallel_coordinates #for multivariate plots
warnings.filterwarnings('ignore') #ignore deprecation warnings
import folium
from folium.plugins import HeatMap, MarkerCluster

In [111]:
data = pd.read_csv('../Data/Combined_df_teg.csv')

In [112]:
data = data.drop(['Unnamed: 0', 'Unnamed: 1'], axis=1)
data['Member.type'] = data['Member.type'].str.lower()

# First Map: Start and End Locations of 2000 random observations 2016-2022

In [113]:
def show_fmaps(data, path=1):
    """function to generate map and add the pick up and drop coordinates
    1. Path = 1 : Join pickup (blue) and drop(red) using a straight line
    """
    map_1 = folium.Map(location=[39.13,-77.4], zoom_start=9) # manually added centre
    data_reduced = data.sample(frac=0.001).reset_index(drop=True)
    for i in range(data_reduced.shape[0]):
        pick_long = data_reduced.loc[data_reduced.index ==i]['start_lng'].values[0]
        pick_lat = data_reduced.loc[data_reduced.index ==i]['start_lat'].values[0]
        dest_long = data_reduced.loc[data_reduced.index ==i]['end_lng'].values[0]
        dest_lat = data_reduced.loc[data_reduced.index ==i]['end_lat'].values[0]
        member_popup = data_reduced.loc[data_reduced.index ==i]['Member.type'].values[0]
        start_station = data_reduced.loc[data_reduced.index ==i]['Start.station'].values[0]
        end_station = data_reduced.loc[data_reduced.index ==i]['End.station'].values[0]
        duration = data_reduced.loc[data_reduced.index ==i]['Duration'].values[0]
        duration = duration.round()
        #iframe = folium.IFrame('Rider:' + member_popup + '<br>' + 'Start Station: ' + start_station + '<br>' + 'End Station: ' + end_station + '<br>' + 'Duration (minutes): ' + str(duration))
        #popup = folium.Popup(iframe, min_width=300, max_width=300)
        info = 'rider: {} start: {} \n\n end: {} \n\n duration (mins): {}'.format(member_popup, start_station, end_station, duration)
        mclsuter = MarkerCluster(name="clusters").add_to(map_1)
        folium.Marker([pick_lat, pick_long], icon=folium.Icon(color='green',icon='play'), popup=info).add_to(mclsuter)
        folium.Marker([dest_lat, dest_long], icon=folium.Icon(color='red',icon='stop'),popup=info).add_to(mclsuter)
    folium.TileLayer('Stamen Terrain').add_to(map_1)
    folium.TileLayer('Stamen Toner').add_to(map_1)
    folium.TileLayer('Stamen Water Color').add_to(map_1)
    folium.TileLayer('cartodbpositron').add_to(map_1)
    folium.TileLayer('cartodbdark_matter').add_to(map_1)
    folium.LayerControl().add_to(map_1)
    return map_1
#left = -77.4, bottom = 38.7, right = -76.9, top = 39.13

In [114]:
start_end_clusters_all = show_fmaps(data, path=1)
start_end_clusters_all.save("2k_start_end_clusters.html")

In [100]:
start_end_points_all = show_fmaps(data, path=1)

In [101]:
start_end_points_all.save("2k_start_end.html")

# Second Map: Heatmap of End Locations Given Start Station of Georgetown O St

In [None]:
data['Member.type'] = data['Member.type'].astype('category')
data['Start.date'] = pd.to_datetime(data['Start.date'])
data['End.date'] = pd.to_datetime(data['End.date'])
data['rideable_type'] = data['rideable_type'].astype('category')
data['Start.station.number'] = data['Start.station.number'].astype('category')
data['End.station.number'] = data['End.station.number'].astype('category')

In [106]:
data['Member.type'].value_counts()

member    1537543
casual     569641
Name: Member.type, dtype: int64

In [109]:
import folium
from folium.plugins import HeatMap

def end_locations_per_start(data, start_location):
    start_location  = data.loc[(data['Start.station'].str.contains(start_location))]
    #start_location_lat = data[data['Start.station']==start_location]['start_lat'].values[0]
    #start_location_lng = data[data['Start.station']==start_location]['start_lng'].values[0]
    end = start_location.groupby(['end_lat','end_lng'])['End.date'].count().reset_index().sort_values(by='End.date', ascending=False).head(50)
    #end = end.loc[(end['end_lat'] != start_location) &  (end['end_lng'] != -77.071611)] # only want to see other stations apart from gtown's
    end.columns = ['End_Lat', 'End_Lng', 'Total Trips']
    loc_ends_map = folium.Map(location=[data.start_lat.mean(), data.start_lng.mean()], zoom_start=14, control_scale=True)
    HeatMap(end).add_to(loc_ends_map)
    folium.TileLayer('Stamen Terrain').add_to(loc_ends_map)
    folium.TileLayer('Stamen Toner').add_to(loc_ends_map)
    folium.TileLayer('Stamen Water Color').add_to(loc_ends_map)
    folium.TileLayer('cartodbpositron').add_to(loc_ends_map)
    folium.TileLayer('cartodbdark_matter').add_to(loc_ends_map)
    folium.LayerControl().add_to(loc_ends_map)
    #folium.Marker([start_location_lat, start_location_lng], icon=folium.Icon(color='green',icon='play')).add_to(loc_ends_map)
    loc_ends_map.save("enterstartloc_heatmap.html")
end_locations_per_start(data, "37th & O St NW / Georgetown University")

In [198]:
# map = folium.Map(location=[data.start_lat.mean(), data.start_lng.mean()], zoom_start=14, control_scale=True)
# data_reduced = data.sample(n = 1000) #combined_data.iloc[1:10]
# for index, location_info in data_reduced.iterrows():
#     folium.Marker([location_info["start_lat"], location_info["start_lng"]], icon=folium.Icon(color='green',icon='play'), popup=location_info["Member.type"]).add_to(map)
#     folium.Marker([location_info["end_lat"], location_info["end_lng"]], icon=folium.Icon(color='red',icon='stop'), popup=location_info["Member.type"]).add_to(map)
#     folium.PolyLine([(location_info["start_lat"], location_info["start_lng"]),
#        (location_info["end_lat"], location_info["end_lng"])],
#                 color='red',
#                 weight=2.5,
#                 opacity=0.8).add_to(map)
# map.save("map_first10_paths.html")

In [None]:
#combined_data.loc[combined_data['start_lat'] > 38.9 & combined_data['end_lat'] < 38.91]

#combined_data.query('38.9 < start_lat < 38.91')

tmp = combined_data.loc[(combined_data['start_lat'] >= 38.86) & (combined_data['start_lat'] <= 38.902)]
tmp = tmp.loc[(tmp['start_lng'] >= -77.056) & (tmp['start_lng'] <= -77.0193)]
downtown_start = tmp.groupby(['start_lat','start_lng'])['Start.date'].count().reset_index()
downtown_start.columns = ['Lat', 'Long', 'Num_trips']
from folium.plugins import HeatMap
dtown_starts_map = folium.Map(location=[combined_data.start_lat.mean(), combined_data.start_lng.mean()], zoom_start=14, control_scale=True)
HeatMap(downtown_start).add_to(dtown_starts_map)
dtown_starts_map.save("dtown_start_heatmap.html")

end = combined_data.loc[(combined_data['end_lat'] >= 38.86) & (combined_data['end_lat'] <= 38.902)]
end = end.loc[(tmp['end_lng'] >= -77.056) & (end['end_lng'] <= -77.0193)]
downtown_end = end.groupby(['end_lat','end_lng'])['End.date'].count().reset_index()
downtown_end.columns = ['Lat', 'Long', 'Num_trips']
from folium.plugins import HeatMap
dtown_ends_map = folium.Map(location=[combined_data.end_lat.mean(), combined_data.end_lng.mean()], zoom_start=14, control_scale=True)
HeatMap(downtown_end).add_to(dtown_ends_map)
dtown_ends_map.save("dtown_end_heatmap.html")