In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from collections import Counter
import os


# Google Maps Geo-plot API
from bokeh.models import (
   GMapPlot, GMapOptions, ColumnDataSource, Circle, Range1d, PanTool, WheelZoomTool, Slider, 
   HoverTool, BoxSelectTool
 )
from bokeh.io import output_file, show, output_notebook, curdoc
from bokeh.plotting import figure, output_file
from bokeh.models.callbacks import CustomJS


import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv('data/updated_merged_df')

In [None]:
# df[df['FIPS_geoID'] == 48453001118]
#df.groupby('FIPS_geoID')['priority'].count().reset_index(name = 'count').sort_values('count', ascending = False)

In [None]:
#df.columns

In [None]:
#df[df['Day_of_week (name)']=='Saturday'].count()

## Create a function that takes a dataframe and adds an additional column (Normalized count for that particular FIPS_geoID. This function will be used within the filter_df function (below) to ensure the count represents the time of day and day of week.

In [3]:
def create_size_column(data):
    df_fips = data.groupby('FIPS_geoID')[['priority']].count().reset_index()
    df2 = pd.merge(data,df_fips,how='left',on='FIPS_geoID')
    df2 = df2.rename(columns={'priority_y':'Count_per_FIPS'})
    df2['Normalized_count'] = df2['Count_per_FIPS']/df2['Count_per_FIPS'].max().astype(np.float64)
    sol = []
    for val in df2['Normalized_count']:
        if val <= .11:
            sol.append(2)
        else:
            sol.append(val*28)
    df2['size'] = sol
    return df2

In [None]:
#create_size_column(df)

## Create a function that takes a DF and returns a new DF with four fields (Day of week, Hour of Day, Lat, Long, and size column)

## Arguments will be day (string) and hour (int)

In [4]:
def filter_df(data,day,hour):
    """This function takes a dataframe and returns a data frame with only: 
       - Day of week
       - Hour of day
       - Lat
       - Long
       - Count_per_FIPS
       - Size column
       
       Inputs:
       - Dataframe
       - Day in string format
       - Hour in int format
       
       Output: A dataframe of: 
       - Lats
       - Longs 
       - Filtered based on the hour and the day put into the function
       - Count of Lats, Longs
       - Size column for plotting
       """
    # Filter columns
    col_lst = ['Day_of_week (name)','Hour','Lat','Long','FIPS_geoID','priority']
    lst = []
    for columns in data.columns:
        if columns in col_lst:
            lst.append(columns)
    
    new_df = data[lst]
    
    # Filter rows by Day value
    new_df = new_df[new_df['Day_of_week (name)']==day]
    #print(new_df['Day_of_week (name)'].nunique())
    
    #Filter rows by Hour value
    new_df = new_df[new_df['Hour']==hour]
    #print(new_df['Hour'].nunique())
    
    # Drop NaN rows
    new_df = new_df.dropna()
    
    # Apply 'create_size_column' function that will count incident occurences for filtered df
    new_df = create_size_column(new_df)
    
    return new_df[['FIPS_geoID','Lat','Long','Count_per_FIPS','size']]

In [None]:
#np.array(df[['Lat','Long']])

In [None]:
df_test = filter_df(df,'Monday',5)
type(df_test)

In [None]:
# create a dataframe with only unique FIPS_geoID and a count
#df_group = df_test.groupby('FIPS_geoID')[['Lat','Long','Count_per_FIPS','size']]

In [None]:
#df_grouped = df_test.groupby(['FIPS_geoID','Lat','Long','size']
df_grouped = df_test.groupby(['FIPS_geoID','Lat','Long','size'])['Count_per_FIPS'].count() \
                    .reset_index(name = 'count').sort_values('count')
# df_test[(df_test.Lat == 30.426141) & (df_test.Long == -97.769773)]

In [None]:
df_grouped.shape

## Creat a function that outputs a Geomap with all the centroids and lat, longs.

## I need to figure out a way to adjust centroid size depending on how many incident points fall within a certain radius of it. 

In [19]:
def plot_df(data,day,hour,num_centroids):
    """This function takes a dataframe and plots the centroids vs the Lats, Longs of the incident data-points
       
       Inputs:
       - Dataframe
       - Day in string format
       - Hour in int format
       - Number of centroids wanted
       
       Output: A plot of lats and longs, overlayed on a map of Austin, 
               filtered based on the hour and the day put into the function
    """
    
    # Using the function created before, applying the proper format to the dataframe for modeling
    new_df = filter_df(data,day,hour)
    
    
    # Create a dataframe that has one count per FIPS_geoID
    # This will be used for the hover tool so that only one total count is shown when one hovers
    # Includes Lat, Long, count, size
    df_count = new_df.groupby(['FIPS_geoID','Lat','Long','size'])['Count_per_FIPS'].count() \
                     .reset_index(name = 'count').sort_values('count')
    
    
    
    # Modeling the data with KMeans
    X = np.array(new_df[['Lat','Long']])
    model = KMeans(n_init=100,n_clusters=num_centroids,max_iter=400,tol=1e-8)
    model.fit(X)
    centroids = model.cluster_centers_
    
    # Defining the Lat,Long to pass into Google maps API 
    cent_lats = list(centroids[:,0])
    cent_longs = list(centroids[:,1])
    
    # Actual Lat,Long of incidents
    incident_lats = list(df_count['Lat'])
    incident_longs = list(df_count['Long'])
    
    # Normalized FIPS count to alter plot point size based on count of incidents
    #df_size = new_df['size'].values
    
    
    
    
    # Geoplotting!!!!!!!!
    map_options = GMapOptions(lat=30.2672, lng=-97.7431, map_type="roadmap", zoom=11)

    plot = GMapPlot(
    x_range=Range1d(), y_range=Range1d(), map_options=map_options)
    plot.title.text = "Austin (Day: {}, Hour: {})".format(day,hour)

    # For GMaps to function, Google requires you obtain and enable an API key:
    #
    # https://developers.google.com/maps/documentation/javascript/get-api-key
    #
    # Replace the value below with your personal API key:
    plot.api_key = os.environ['GOOGLE_API_KEY']
    
    
    completed_source = ColumnDataSource( data=dict(
    lat=cent_lats,
    lon=cent_longs,))
    completed_dots = Circle(x="lon", y="lat", size=55, fill_color="blue", fill_alpha=0.2, line_color=None)
    plot.add_glyph(completed_source, completed_dots)
    
    completed_source = ColumnDataSource( data=dict(
    lat=incident_lats,
    lon=incident_longs,
    size=df_count['size'].values,
    count=df_count['count'].values))
    
    
    # Size of lat,long plots will be determind from the normalized count column
    # If size < .11 then the plot will be size=2 ... otherwise it will multiply by 28
  
    completed_dots = Circle(x="lon", y="lat", size="size", fill_color="red", fill_alpha=0.8, line_color=None)
    plot.add_glyph(completed_source, completed_dots)
    
    
    # Hover tool implementation
    hover = HoverTool(tooltips=[
                     ("Incident Count", "@count")])
   
    
   
    plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool(),hover)
    show(plot)
    

## Plot below

In [20]:
plot_df(df,'Friday',22,1)

## Slider tool implementation

In [None]:
def final_plot(data):
    
#   The first thing this function does is open a webpage with default values entered below
    plot_df(data,'Monday',5,10)
    
    
    
#   Trying to create three sliders that allow a user to modify Day, Hour, and Centroid count from browser
#   This interface must then update the plot accordingly using callback

    sourceupper = completed_source
    p.line('x', 'y', source = sourceupper, line_color = "navy", line_width = 4)
    

    slider_day = Slider(start=0.1, end=4, value=1, step=.1, title="Day of Week",callback=callback1)
    slider_hour = Slider(start=0, end=23, value=1, step=1, title="Hour of Day",callback=callback2)
    slider_centroid = Slider(start=2, end=19, value=10, step=1, title="Number of Centroids",callback=callback3)
    
    callback1 = CustomJS(args=dict(source=sourceupper), code="""
                var data = source.get('data');
                var f = cb_obj.get('value');
                x = data['x'];
                y = data['y'];
                y[0] = f *10;
                y[1] = f *10;
                source.trigger('change');
            """)
    callback2 = CustomJS(args=dict(source=sourceupper), code="""
                var data = source.get('data');
                var f = cb_obj.get('value');
                x = data['x'];
                y = data['y'];
                x[1] = f

                source.trigger('change');
            """)
    callback3 = CustomJS(args=dict(source=sourceupper), code="""
                var data = source.get('data');
                var f = cb_obj.get('value');
                x = data['x'];
                y = data['y'];
                x[2] = parseInt(f) + 10
                console.log(x[2])
                source.trigger('change');
            """)
    
    
#     def data_changed(attr, old, new):
        
#      # update sliders here
#         slider1.value = # whatever
#         source.on_change('data', data_changed)
#     def slider1_changed(attr, old, new):
#         source.data = # set new data or modify data
#         slider1.on_change('value', slider1_changed)
    
    
    
    
    
    slider.js_on_change('value', callback)

    layout = column(slider, plot)

    show(layout)
    
    

## Bunch of testing below nothing to see here

In [None]:
#plt.scatter(np.array(df['Lat']),np.array(df['Long']))

In [None]:
#type(df["Year"].values[0])

In [None]:
#df[df['Year']==2012 & df['Day_of_week (name)']=='Monday' & df['Hour']==2]

In [None]:
df_test.columns

In [None]:
#df_test['normalize_count']=(df['Count_per_FIPS']-df['Count_per_FIPS'].mean())/df['Count_per_FIPS'].std()

In [None]:
#df_test['normalize_count'].max()

In [None]:
df_test['Normalized_count'] = df_test['Count_per_FIPS']/df['Count_per_FIPS'].max().astype(np.float64)

In [None]:
df_test['Normalized_count'].nunique()