In [0]:
# Initial Map plot of KC Crime data(not cleaned) from years 2009 - 2017, randomly sampled from a shuffled pandas dataframe. Colored by the Area column. 6-10-2018

In [1]:
!pip install bokeh



In [0]:
import pandas as pd
import numpy as np
from bokeh.io import output_file, output_notebook, show
from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, LogColorMapper,CategoricalColorMapper, BasicTicker, ColorBar,
    DataRange1d,Range1d, PanTool, WheelZoomTool, BoxSelectTool
)
from bokeh.models.mappers import ColorMapper, LinearColorMapper
from bokeh.palettes import Viridis5, Category20b

In [0]:
# Bring in the data

# Google drive connection using pydrive
!pip install -U -q PyDrive
 
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [5]:
# Looks through the folder provided by Hou and downloads the concatenated KCPD crime data 
# and loads it into a pandas dataframe name 'df'

folder_id = 'Hou folder key to google drive provided at top of slack channel' 

df = ""

file_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(folder_id)}).GetList()
for file in file_list:
  if file['title'] == 'kcpd_crime_2009-2017_concat.csv':
    print("Found: {}".format(file['title']))
    tmp_file_connection = drive.CreateFile({'id':'{}'.format(file['id'])})
    tmp_file_connection.GetContentFile(file['title'])
    df = pd.read_csv(file['title'])

Found: kcpd_crime_2009-2017_concat.csv


  interactivity=interactivity, compiler=compiler, result=result)


In [0]:
# Shuffles the dataframe then samples n samples
n = 100000

import numpy as np
df_shuffled = df.reindex(np.random.permutation(df.index))
df_sampled = df_shuffled.sample(n=n)


In [8]:
# Modified example from this website: http://www.bigendiandata.com/2017-06-27-Mapping_in_Jupyter/

map_options = GMapOptions(lat=39.125212, lng=-94.551136, map_type="roadmap", zoom=10)

plot = GMapPlot(
    x_range=Range1d(), y_range=Range1d(), map_options=map_options
)
plot.title.text = "KC Crime Data"

# Replace text in string with api key
plot.api_key = "Google maps api key pinned in slack channel"

source = ColumnDataSource(
    data=dict(
        lat=df_sampled.Latitude.tolist(),
        lon=df_sampled.Longitude.tolist(),             
        color=df_sampled.Area.tolist()
    )
)


color_mapper = CategoricalColorMapper(factors=['CP', 'CPD', 'EP', 'EPD', 'MPD', 'NP', 'NPD', 'OSPD', 'PD1', 'SC', 'SCP', 'SP', 'SPD'], palette=Category20b[20])


circle = Circle(x="lon", y="lat", fill_color={'field': 'color', 'transform': color_mapper}, fill_alpha=0.2, line_color=None)
plot.add_glyph(source, circle)

plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())

output_notebook()

show(plot)