In [5]:
import pandas as pd
import re
import numpy as np
import geopandas as gp
from utils import misc_utils as ct
#%matplotlib inline
import matplotlib.pyplot as plt
import osmnx as ox
import folium
from folium.plugins import MarkerCluster
from folium import IFrame
import branca.colormap as cm


In [6]:
#############
#Config
############
start_time = '2017-02-23 6:00:00'
end_time = '2017-02-23 23:00:00'
experiment_id = 14
result_id = 93
val_set_id = 0
map_name = 'Accident_Prediction_Map.html'



In [None]:
# generates array with the coordinate values for all hectopoints in the segment
def get_seg_coords(df):
    coords = []
    for index, row in df.iterrows():
        coords.append([row['long'], row['lat']])   
    return coords

In [7]:
# get all of the relevant data from SQL
conn = ct.connect_rds()

# get experimental results and save as dataframe
sql = """select * from rws_experiment.raw_y_score where experiment_id = {0} and result_id = {1} and val_set_id = {2} limit 1;""".format(experiment_id,result_id, val_set_id) 
df_read_in = pd.read_sql(sql, con=conn)


# read in each hectopoint, along with it's assigned cluster
sql = "select s.hectokey, s.hectokey_merged, h.lat, h.long \
from segmentation.ten_km as s \
left join rws_clean.unique_hectopunten as h \
on s.hectokey = h.hectokey \
where hectokey_merged  like '%.Li.Li' or hectokey_merged like '%.Re.Re';"
df_hect = pd.read_sql(sql, con=conn)

# extract the road segment value so that we can sort based on that value
df_hect['hecto'] = df_hect['hectokey'].str.extract(r'^\d+\.(\d+)\.\w+\.\w+')[0].astype(int)

# get number of unique segments
sql = "select count(distinct(hectokey_merged)) from segmentation.ten_km \
where hectokey_merged  like '%.Li.Li' or hectokey_merged like '%.Re.Re';"
num_seg = pd.read_sql(sql, con=conn).iloc[0]['count']

# get ongevallen dataset
sql = "select ongekey, datetime, lat, long from rws_clean.ongevallen;"
df_ong = pd.read_sql(sql, con=conn)
df_ong['datetime'] = df_ong['datetime'].astype(str)

conn.close()

# clean data
y_scores = df_read_in.loc[0]["y_scores"]
y_true = df_read_in.loc[0]["y_true"]
space_time = df_read_in.loc[0]["space_time"]


df_scores = pd.DataFrame({'space': space_time['space'], 'time': space_time['time'], 'y_scores': y_scores, 'y_true':y_true})
df_scores['time_as_timestamp'] = df_scores['time']
df_scores['time_as_timestamp'] = pd.to_datetime(df_scores['time_as_timestamp'])
df_scores['y_scores'] = df_scores['y_scores'].round(4)

#join with df_hecto
df_scores = pd.merge(df_hect, df_scores, how='left', left_on="hectokey_merged", right_on="space",
         left_index=True, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

df_scores = df_scores.sort_values(by='time_as_timestamp', ascending=True)

#get subset of the data
df_scores = df_scores.set_index(['time_as_timestamp'])
df_scores = df_scores.loc[start_time:end_time]


timestamps = df_scores['time'].unique()

# index ongevallen by time and filter out any entries not within the start and end time
df_ong['time_index'] = df_ong['datetime']
df_ong['time_index'] = pd.to_datetime(df_ong['time_index'])
df_ong = df_ong.set_index(['time_index'])
df_ong = df_ong.loc[start_time:end_time]




In [None]:
# Add the background for the map, give two view options, OSM and neutral
# boundary of the image on the map
min_lon = 4.77322
max_lon = 5.57805
min_lat = 51.8252
max_lat = 52.37998994

map_rws = folium.Map(location=[52.2275,5.45671],tiles='openstreetmap')
folium.TileLayer('Stamen Toner').add_to(map_rws)
folium.TileLayer('cartodbpositron').add_to(map_rws)



# To color each hectopunten, we need to pull in the output of the prediction model
# NOTE: this wouldn't at this time because heatmap doesn't support color adustments :(
colormap = cm.LinearColormap(colors=['blue','green','yellow','orange','red'], vmin=0,vmax=df_scores['y_scores'].max())
map_rws.add_child(colormap)
#http://nbviewer.jupyter.org/github/python-visualization/folium/blob/v0.2.0/examples/Colormaps.ipynb


# NOTE: This code will need to change if segments can consists of more than one road
# generate geojson string for all the coordinates for each segment
df_seg_coords = df_scores.sort_values(['hecto']).groupby(['hectokey_merged']).apply(lambda x: get_seg_coords(x)).reset_index(name='seg_coords');

df_scores = df_scores[['hectokey_merged', 'time','y_scores', 'y_true']].groupby(['hectokey_merged', 'time']).first().reset_index()

In [None]:
# for each timestamp, add a layer to the map
for time_idx,time_string in enumerate(timestamps):
    if time_idx == len(timestamps)-1:
        break
    #get the data for each hectopunten at that time
    df_onetime = df_scores[df_scores['time'] == time_string]
    #get seg coords
    df_onetime = pd.merge(df_onetime, df_seg_coords, how='left', on="hectokey_merged",
         left_index=True, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)
    
    fg = folium.FeatureGroup(name=time_string).add_to(map_rws)
    
    # write data as geoson structure
    for idx, row in df_onetime.iterrows():
        geojson_data = {'type':'FeatureCollection', 'features':[]}    
        feature = {'type':'Feature',
            'properties':{'name':row['hectokey_merged'], 'strokeColor': colormap(row['y_scores']),
                 "popupContent": row['hectokey_merged']},
            'geometry':{'type':'LineString',
            'coordinates':[]}}
        feature['geometry']['coordinates'] = row['seg_coords']
        geojson_data['features'].append(feature)

        # add it to the map
        g = folium.GeoJson(geojson_data,
                  name = row['hectokey_merged'],
                   style_function=lambda feature: {
                       'color': feature['properties']['strokeColor'],
                       'weight': 3
                   })
        g.add_child(folium.Popup(str(row['hectokey_merged']) + ", Prediction:" + str(row['y_scores'])))
        fg.add_child(g)
        
    #add in accident data
    #mask = (df_ong.index > time_string) & (df_ong.index <= timestamps[time_idx + 1])
    #df_ong_curr = df_ong.loc[mask]
    #coords, popups = [], []
    #for idx, row in df_ong_curr.iterrows():
        
        #Append lat and long coordinates to "coords" list
        #coords.append([row['lat'], row['long']])
        #label = '<br>'.join([row[field] for field in ['ongekey','datetime']])
        #popups.append(IFrame(label, width = 300, height = 100))    
        #fg.add_child(MarkerCluster(locations = coords, popups = popups))


In [None]:
# add layer control to toggle each data layer as well as background tiles. save as html
folium.LayerControl().add_to(map_rws)
map_rws.save(map_name)