In [9]:
# Preliminaries
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
from geopandas.tools import sjoin
from shapely.geometry import Point
from random import randint
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Import SPD Incidents dataset from data.seattle.gov, drop any duplicates
spd = pd.read_csv("Seattle_Police_Department_Police_Report_Incident.csv").drop_duplicates(keep="first")

# Import Seattle Neighborhoods dataset from github.com/seattleio (original .geojson file has been converted to .json)
nbh = gpd.read_file("neighborhoods.json")

In [3]:
# Get points within the University District boundaries
geometry = [Point(xy) for xy in zip(spd.Longitude, spd.Latitude)]
crs = {'init': 'epsg:4326'}
spd = GeoDataFrame(spd, crs=crs, geometry=geometry)
udist_geo = nbh[nbh["name"] == "University District"]
pointInPolys = sjoin(spd, udist_geo, how='left')
df = pointInPolys[pointInPolys['index_right'] == 5.0]



In [4]:
df.head()

Unnamed: 0,RMS CDW ID,General Offense Number,Offense Code,Offense Code Extension,Offense Type,Summary Offense Code,Summarized Offense Description,Date Reported,Occurred Date or Date Range Start,Occurred Date Range End,...,Year,geometry,index_right,id,city,county,nhood,nested,area,name
18,1412044,2010427618,2308,0.0,THEFT-BUILDING,2300,OTHER PROPERTY,12/12/2010 04:04:00 PM,12/11/2010 11:00:00 PM,12/12/2010 12:30:00 AM,...,2010,POINT (-122.313034058 47.667491913),5.0,Seattle:University District:,Seattle,King,University District,,49348390.0,University District
62,1032298,2016265287,2404,8.0,VEH-THEFT-TRUCK,2400,VEHICLE THEFT,07/24/2016 03:44:00 PM,07/21/2016 08:00:00 PM,07/24/2016 03:44:00 PM,...,2016,POINT (-122.308311462 47.665634155),5.0,Seattle:University District:,Seattle,King,University District,,49348390.0,University District
110,1412085,2010428396,5050,1.0,HARASSMENT,5000,THREATS,12/13/2010 08:56:00 AM,06/01/2010 12:01:00 AM,12/13/2010 07:00:00 AM,...,2010,POINT (-122.317359924 47.669715881),5.0,Seattle:University District:,Seattle,King,University District,,49348390.0,University District
118,1412088,2010428464,1313,0.0,ASSLT-NONAGG,1300,ASSAULT,12/13/2010 01:57:00 PM,12/13/2010 10:16:00 AM,,...,2010,POINT (-122.313117981 47.662181854),5.0,Seattle:University District:,Seattle,King,University District,,49348390.0,University District
125,1032361,2016320567,2308,0.0,THEFT-BUILDING,2300,OTHER PROPERTY,09/04/2016 08:41:00 AM,09/04/2016 12:01:00 AM,09/04/2016 08:30:00 AM,...,2016,POINT (-122.306495667 47.667217255),5.0,Seattle:University District:,Seattle,King,University District,,49348390.0,University District


In [5]:
# Extract datetime
time = df['Occurred Date or Date Range Start']
datetime = df["datetime"] = pd.to_datetime(time)
df['day'] = datetime.apply(lambda x: x.weekday())
df['hour'] = datetime.dt.hour
df['week'] = datetime.dt.week

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the ca

In [6]:
# Renaming
change = {'BURGLARY-SECURE PARKING-RES': 'BURGLARY'}
df = df.rename(columns={'Summarized Offense Description': 'otype'}).replace({'otype': change})

# Create text from column 6 (Summarized Offense Description) and 8 (Occured Date or Date Range Start)
df['text'] = df.iloc[:,[6, 8]].apply(lambda x: ' '.join(x), axis=1)

# Select data to be exported
df = df[df['Year'] > 2007].reset_index()
df_udist = df.iloc[:,[7,18,19,20,31,33]]

In [50]:
# Generate a unique color for each type of incident
colors = []
for i in range(48):
    colors.append('#%06X' % randint(0, 0xFFFFFF))

In [51]:
# Build a series of colors and add to the dataframe
s_colors = []
for i in range(len(df_udist)):
    s_colors.append(colors[list(df_udist.otype.unique()).index(df_udist.otype[i])])
    
df_udist['color'] = s_colors

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [52]:
# Export dataframe as .geojson
output_filename = 'df_udist.geojson'
with open(output_filename, 'w') as f:
    f.write(df_udist.to_json())