# Kaggle: San Francisco Crime Classification
## Improvement as part of sdap17 excercise 3

In [3]:
import pandas as pd
import numpy as np
import pprint
import requests
import gmaps
import gmaps.datasets

## Exploration of the training data set

In [37]:
data = pd.read_csv("../../data/raw/henrik/train.csv")
data['Dates'] = pd.to_datetime(data['Dates'])

In [13]:
data.head()

Unnamed: 0,Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y
0,2015-05-13 23:53:00,WARRANTS,WARRANT ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.425892,37.774599
1,2015-05-13 23:53:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.425892,37.774599
2,2015-05-13 23:33:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",VANNESS AV / GREENWICH ST,-122.424363,37.800414
3,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,NORTHERN,NONE,1500 Block of LOMBARD ST,-122.426995,37.800873
4,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,PARK,NONE,100 Block of BRODERICK ST,-122.438738,37.771541


In [14]:
data.tail()

Unnamed: 0,Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y
878044,2003-01-06 00:15:00,ROBBERY,ROBBERY ON THE STREET WITH A GUN,Monday,TARAVAL,NONE,FARALLONES ST / CAPITOL AV,-122.459033,37.714056
878045,2003-01-06 00:01:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Monday,INGLESIDE,NONE,600 Block of EDNA ST,-122.447364,37.731948
878046,2003-01-06 00:01:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Monday,SOUTHERN,NONE,5TH ST / FOLSOM ST,-122.40339,37.780266
878047,2003-01-06 00:01:00,VANDALISM,"MALICIOUS MISCHIEF, VANDALISM OF VEHICLES",Monday,SOUTHERN,NONE,TOWNSEND ST / 2ND ST,-122.390531,37.780607
878048,2003-01-06 00:01:00,FORGERY/COUNTERFEITING,"CHECKS, FORGERY (FELONY)",Monday,BAYVIEW,NONE,1800 Block of NEWCOMB AV,-122.394926,37.738212


In [15]:
len(data)

878049

In [16]:
crimes = data['Category'].unique()
pprint.pprint("Crimes: {}, #{}".format(crimes, len(crimes)), indent=2)

("Crimes: ['WARRANTS' 'OTHER OFFENSES' 'LARCENY/THEFT' 'VEHICLE THEFT' "
 "'VANDALISM'\n"
 " 'NON-CRIMINAL' 'ROBBERY' 'ASSAULT' 'WEAPON LAWS' 'BURGLARY'\n"
 " 'SUSPICIOUS OCC' 'DRUNKENNESS' 'FORGERY/COUNTERFEITING' 'DRUG/NARCOTIC'\n"
 " 'STOLEN PROPERTY' 'SECONDARY CODES' 'TRESPASS' 'MISSING PERSON' 'FRAUD'\n"
 " 'KIDNAPPING' 'RUNAWAY' 'DRIVING UNDER THE INFLUENCE'\n"
 " 'SEX OFFENSES FORCIBLE' 'PROSTITUTION' 'DISORDERLY CONDUCT' 'ARSON'\n"
 " 'FAMILY OFFENSES' 'LIQUOR LAWS' 'BRIBERY' 'EMBEZZLEMENT' 'SUICIDE'\n"
 " 'LOITERING' 'SEX OFFENSES NON FORCIBLE' 'EXTORTION' 'GAMBLING'\n"
 " 'BAD CHECKS' 'TREA' 'RECOVERED VEHICLE' 'PORNOGRAPHY/OBSCENE MAT'], #39")


In [17]:
descriptions = data['Descript'].unique()
pprint.pprint("Descriptions: {}, #{}".format(descriptions, len(descriptions)), indent=2)

("Descriptions: ['WARRANT ARREST' 'TRAFFIC VIOLATION ARREST' 'GRAND THEFT FROM "
 "LOCKED AUTO'\n"
 " 'GRAND THEFT FROM UNLOCKED AUTO' 'STOLEN AUTOMOBILE'\n"
 " 'PETTY THEFT FROM LOCKED AUTO' 'MISCELLANEOUS INVESTIGATION'\n"
 " 'MALICIOUS MISCHIEF, VANDALISM OF VEHICLES' 'FOUND PROPERTY'\n"
 " 'ROBBERY, ARMED WITH A KNIFE' 'AGGRAVATED ASSAULT WITH BODILY FORCE'\n"
 " 'TRAFFIC VIOLATION' 'ROBBERY, BODILY FORCE'\n"
 " 'STAY AWAY OR COURT ORDER, NON-DV RELATED' 'LOST PROPERTY'\n"
 " 'ATTEMPTED THEFT FROM LOCKED VEHICLE' 'CIVIL SIDEWALKS, CITATION'\n"
 " 'MALICIOUS MISCHIEF, VANDALISM' 'SUSPICIOUS PACKAGE'\n"
 " 'AIDED CASE, MENTAL DISTURBED' 'PETTY THEFT SHOPLIFTING'\n"
 " 'PROBATION VIOLATION' 'STAY AWAY ORDER VIOLATION, DV RELATED'\n"
 " 'DRIVERS LICENSE, SUSPENDED OR REVOKED' 'STOLEN MOTORCYCLE'\n"
 " 'GRAND THEFT FROM PERSON' 'BURGLARY, VEHICLE (ARREST MADE)'\n"
 " 'ATTEMPTED ROBBERY ON THE STREET WITH BODILY FORCE'\n"
 " 'PETTY THEFT FROM A BUILDING' 'INVESTIGATIVE DETENTION'\n"
 " '

In [18]:
adresses = data['Address'].unique()
pprint.pprint("Adresses: {} #{}".format(adresses, len(adresses)), indent=2)

("Adresses: ['OAK ST / LAGUNA ST' 'VANNESS AV / GREENWICH ST'\n"
 " '1500 Block of LOMBARD ST' ..., '300 Block of JOHN F KENNEDY DR'\n"
 " 'FOLSOM ST / ZENO PL' '1000 Block of 22ND AV'] #23228")


In [None]:
data['Category'].value_counts()

In [7]:
# load google api key
file = open("./assets/gapi.key", 'r')
key = file.read()
file.close()
gmaps.configure(api_key=key)

In [48]:
# Creating a location subset from the most current crimes (2014-2015)
import datetime
start_date = datetime.date(2015,1,1)
end_date = datetime.date(2016,1,1)
date_mask = (data['Dates'] > start_date) & (data['Dates'] <= end_date)
location_subset = data.loc[date_mask][['Y', 'X']]
locations = [tuple(x) for x in location_subset.values]
len(locations)

27584

In [49]:
fig = gmaps.figure()

In [50]:
fig.add_layer(gmaps.heatmap_layer(locations))

In [51]:
fig