In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from mapping_helper_functions import convert_latitude_to_webmercator, convert_longitude_to_webmercator
from rossmo_et_ridgway import Rossmo

In [3]:
body_locations_path = '../resources/Ridgway/body_locations.csv'
ridgway_locations_path = '../resources/Ridgway/ridgway_locations.csv'
disappearances_path = '../resources/Ridgway/disappearances.csv'

df_victims = pd.read_csv(body_locations_path)
df_ridgway = pd.read_csv(ridgway_locations_path)
df_disappearances = pd.read_csv(disappearances_path)

In [4]:
R = Rossmo.from_dataframe([df_victims, df_disappearances])

In [6]:
df_main = pd.concat([df_victims, df_disappearances])
df_main['coordinates'] = list(zip(df_main['latitude'], df_main['longitude']))
df_main['latitude_webmercator'] = convert_latitude_to_webmercator(df_main['latitude'])
df_main['longitude_webmercator'] = convert_longitude_to_webmercator(df_main['longitude'])

## TODO: 

- have the Rossmo class do the following:
  - accept a csv, look for Y or X or Lat or Lon columns, respectively
    - converts those to 'coordinates' if that doesn't already exist, adds that column to resulting dataframe
  - convert coordinates column from df to list, so user doesn't have to
  - return the dataframe 
  - add the score_normalized column
  - set anything below q1 to score_normalized = 0 by default
- decide whether class should handle plotting as well

In [8]:
df_ridgway['coordinates'] = list(zip(df_ridgway['latitude'], df_ridgway['longitude']))
df_ridgway['latitude_webmercator'] = convert_latitude_to_webmercator(df_ridgway['latitude'])
df_ridgway['longitude_webmercator'] = convert_longitude_to_webmercator(df_ridgway['longitude'])


df_victims['coordinates'] = list(zip(df_victims['latitude'], df_victims['longitude']))
df_victims['latitude_webmercator'] = convert_latitude_to_webmercator(df_victims['latitude'])
df_victims['longitude_webmercator'] = convert_longitude_to_webmercator(df_victims['longitude'])


df_disappearances['coordinates'] = list(zip(df_disappearances['latitude'], df_disappearances['longitude']))
df_disappearances['latitude_webmercator'] = convert_latitude_to_webmercator(df_disappearances['latitude'])
df_disappearances['longitude_webmercator'] = convert_longitude_to_webmercator(df_disappearances['longitude'])

In [9]:
df_main.sample(5)

Unnamed: 0,longitude,latitude,name,description,coordinates,latitude_webmercator,longitude_webmercator
1,-122.304973,47.612925,Andrea Marion Childers,,"(47.6129248, -122.304973)",6042699.0,-13614930.0
21,-122.345525,47.716933,Linda Jane Rule,,"(47.7169332, -122.345525)",6059891.0,-13619440.0
29,-121.812542,47.166295,Marta Reeves,,"(47.1662953, -121.812542)",5969260.0,-13560110.0
28,-122.285058,47.55785,Mary Exzetta West,,"(47.5578503, -122.2850579)",6033610.0,-13612710.0
19,-122.283171,47.476839,Kimberly L. Nelson,,"(47.4768391, -122.2831713)",6020257.0,-13612500.0


In [10]:
R = Rossmo(
    df_main['coordinates'].to_list(),
    accuracy=100
)

In [11]:
rossmo_results = R.rossmo_results

In [12]:
df_rossmo_results = pd.DataFrame({'coordinates': rossmo_results.keys(), 'score': rossmo_results.values()})

In [13]:
df_rossmo_results['score_normalized'] = (
    (df_rossmo_results['score'] - df_rossmo_results['score'].min()) / (df_rossmo_results['score'].max() - df_rossmo_results['score'].min()) 
) 
df_rossmo_results.sample(5)

Unnamed: 0,coordinates,score,score_normalized
658,"(43.51009862719382, -123.13278051429779)",42.731948,0.04497
9205,"(49.55214600791408, -118.85251137933304)",39.576805,0.031862
5055,"(46.60137868244605, -122.89050112929979)",76.291275,0.184389
339,"(43.29932953251753, -121.59834440931043)",42.848466,0.045454
6241,"(47.4444550611512, -121.7598639993091)",128.234512,0.400182


In [14]:
stats = df_rossmo_results['score'].describe()
stats

count    10000.000000
mean        52.410373
std         18.394951
min         31.907393
25%         41.548119
50%         47.409362
75%         57.104297
max        272.615382
Name: score, dtype: float64

In [15]:
# set normalized score for score values below q1 to 0.0
q1 = stats['25%']
df_rossmo_results.loc[df_rossmo_results['score'] <= q1, ['score_normalized']] = 0.0

In [16]:
df_rossmo_results.loc[df_rossmo_results['score_normalized'] > 0].sort_values(by='score_normalized', ascending=False).head(5)

Unnamed: 0,coordinates,score,score_normalized
6248,"(47.4444550611512, -122.32518256430444)",272.615382,1.0
6147,"(47.3741986962591, -122.24442276930512)",264.763943,0.967382
6247,"(47.4444550611512, -122.24442276930512)",250.230192,0.907003
6148,"(47.3741986962591, -122.32518256430444)",247.161891,0.894256
6348,"(47.5147114260433, -122.32518256430444)",241.660345,0.8714


In [17]:
df_rossmo_results[['latitude', 'longitude']] = pd.DataFrame(df_rossmo_results['coordinates'].tolist(), index=df_rossmo_results.index)

# convert latitude and longitude to web mercator for plotting
df_rossmo_results['latitude_webmercator'] = convert_latitude_to_webmercator(df_rossmo_results['latitude'])
df_rossmo_results['longitude_webmercator'] = convert_longitude_to_webmercator(df_rossmo_results['longitude'])

In [28]:
from bokeh.plotting import figure, output_notebook, show
from bokeh.tile_providers import CARTODBPOSITRON_RETINA, get_provider
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.transform import jitter

# show map in notebook
output_notebook() # in show method

tile_provider = get_provider(CARTODBPOSITRON_RETINA) # in plot method

x_range = df_rossmo_results['longitude_webmercator'].min(), df_rossmo_results['longitude_webmercator'].max()  # lon
y_range = df_rossmo_results['latitude_webmercator'].min(), df_rossmo_results['latitude_webmercator'].max() # lat 

hover = HoverTool(
    names=['ridgway', 'victims', 'disappearances'],
    tooltips=[
        ('Name', '@Name'),
        ('Coordinates', '@coordinates'),
        ('description', '@description')
    ]
) # call this in plot method

# range bounds supplied in web mercator coordinates
p = figure(
    x_range=x_range, # ideal coordinates top to bottom: 47.8 - 47.25, left to right: -122.65 - -121.95 
    y_range=y_range,
    width=1000,
    height=1000,
    x_axis_type='mercator', 
    y_axis_type='mercator',
    tools=['pan', 'wheel_zoom', 'save', 'reset', hover],
    lod_threshold=None
) #plot method
p.add_tile(tile_provider) #plot method

d = np.fliplr(
    df_rossmo_results['score_normalized']\
    .to_numpy()\
    .reshape((R.accuracy, R.accuracy))
) # call this in plot method

# plot the heatmap
p.image(
    image=[d], 
    x=x_range[0],
    y=y_range[0],
    dw=abs(x_range[1] - x_range[0]), 
    dh=abs(y_range[1] - y_range[0]), 
    palette='Spectral10', 
    alpha=0.65
) # plot method

# 

# field_name (str) – a field name to configure DataSpec with
# width (float) – the width of the random distribution to apply
# mean (float, optional) – an offset to apply (default: 0)
# distribution (str, optional) – "uniform" or "normal" (default: "uniform")
# range (Range, optional) – a range to use for computing synthetic coordinates when necessary, e.g. a FactorRange when the column data is categorical (default: None)

source_ridgway = ColumnDataSource(df_ridgway)
p.circle(
    x='longitude_webmercator',
    y='latitude_webmercator',
    radius=100,
    legend_label='ridgway',
    color='black',
    source=source_ridgway,
    name='ridgway'
)  # plot method

source_victims = ColumnDataSource(df_victims)
p.circle(
    x=jitter('longitude_webmercator', 0.1),
    y=jitter('latitude_webmercator', 0.1),
    radius=150,
    legend_label='victims',
    color='blue',
    source=source_victims,
    name='victims'
)  # plot method

source_disappearances = ColumnDataSource(df_disappearances)
p.circle(
    x=jitter('longitude_webmercator', 0.1),
    y=jitter('latitude_webmercator', 0.1),
    fill_color='green',
    line_color='black',
    hatch_color='black',
    hatch_pattern='.',
    hatch_weight=0.1,
    radius=100,
    legend_label='disappearances',
    source=source_disappearances,
    name='disappearances'
) # plot method



# # You are attempting to set `plot.legend.location` on a plot that has zero legends added, this will have no effect.
# # Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.

# p.legend.location = "top_left"
p.legend.click_policy="hide" # plot method

show(p) # show method

In [None]:
top_10 = (
    df_rossmo_results.loc[df_rossmo_results['score_normalized'] > 0]\
    .sort_values('score_normalized', ascending=False)[:10]\
    .reset_index()
)
top_10

https://stackoverflow.com/a/43759646/13959910

In [19]:
df_ridgway.columns

Index(['X', 'Y', 'Name', 'description', 'coordinates', 'latitude_webmercator',
       'longitude_webmercator'],
      dtype='object')

In [53]:
# ideal coordinates top to bottom: 47.8 - 47.25, left to right: -122.65 - -121.95 
((y_range[-1] + y_range[0]) / 2)

46.566250499999995

In [56]:
np.mean(y_range)

46.566250499999995

example of using tooltip to display images in hovertool
https://docs.bokeh.org/en/0.11.1/docs/user_guide/tools.html#custom-tooltip