In [1]:
# import modules

import pandas as pd
import numpy as np
import json
from pandas.io.json import json_normalize
from geopy.distance import vincenty
import re
import statsmodels.formula.api as sm
from scipy.stats import gaussian_kde
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
crimedf = pd.read_csv('NYPD_7_Major_Felony_Incident_Map.csv', index_col=0)

In [3]:
# use only 2015 incidents

crimedf2015=crimedf[crimedf['Occurrence Year']==2015]

In [4]:
# convert crime dataframe text lat/long coordinates to float tuples. 
# need to split the string into the lat and long numbers, convert into floats and then combine them into tuples.

tupler = lambda x: tuple(map(float, str.split(x[1:-1], sep=',')))

crimedf2015['lat_long']=crimedf2015['Location 1'].apply(tupler)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [5]:
# define distance calculation function using Vincenty's formula from lat/long pairs

def dist_calc(pointA, pointB):
    return vincenty(pointA, pointB).miles

In [115]:
# crime filter: for any given point, return the list of felony IDs within the threshold

def crime_list(pointA, crimetable=crimedf2015, threshold=0.25):
    list_of_distances=[]
    df=crimetable
    for crimeloc in df.lat_long:
        list_of_distances.append(dist_calc(pointA, crimeloc))
    df_of_crimes=pd.DataFrame({'Identifier':df.Identifier, 'Offense':df.Offense,'Distance':list_of_distances })
    return df_of_crimes[df_of_crimes.Distance<threshold]

In [143]:
# crime counter: for any given point, count the number of each type of felony and the total number

crime_type_list=list(unique(crimedf2015.Offense))

def crime_counter(pointA, crimetable=crimedf2015, threshold=0.25):
    df=crime_list(pointA, crimetable)
    fel_name=[]
    fel_count=[]
    for felony in crime_type_list + ['TOTAL FELONIES']:
        fel_name.append(felony)
        if felony == 'TOTAL FELONIES':
            fel_count.append(len(df))
        else:
            fel_count.append(sum(df.Offense==felony))
    
    return pd.DataFrame({'felony' : fel_name, 'felony_count' : fel_count})

In [102]:
fel_name=[]
fel_mean=[]
fel_std=[]
cuff_threshold_1=[]
cuff_threshold_2=[]
cuff_threshold_3=[]
cuff_threshold_4=[]
for felony in crime_type_list + ['TOTAL FELONIES']:
    name_of_felony=felony
    mean_of_felony=mean(yelpcrimedf[felony])
    std_of_felony=yelpcrimedf[felony].std(axis=0)
    thresh1=mean_of_felony-1.5*std_of_felony
    thresh2=mean_of_felony-0.5*std_of_felony
    thresh3=mean_of_felony+0.5*std_of_felony
    thresh4=mean_of_felony+1.5*std_of_felony
    
    fel_name.append(name_of_felony)
    fel_mean.append(mean_of_felony)
    fel_std.append(std_of_felony)
    cuff_threshold_1.append(thresh1)
    cuff_threshold_2.append(thresh2)
    cuff_threshold_3.append(thresh3)
    cuff_threshold_4.append(thresh4)

thresholds=pd.DataFrame({'felony' : fel_name, 'thresh1' : cuff_threshold_1, 'thresh2' : cuff_threshold_2, 'thresh3' : cuff_threshold_3, 'thresh4' : cuff_threshold_4})

In [246]:
# cuff_rating: for any location, determine what cuff rating they belong to based on the threshold values

def cuff_rating(pointA, crimetable=crimedf2015):
    df=crime_counter(pointA)
    df['< thresh1']=df.felony_count<thresholds.thresh1
    df['< thresh2']=df.felony_count<thresholds.thresh2
    df['< thresh3']=df.felony_count<thresholds.thresh3
    df['< thresh4']=df.felony_count<thresholds.thresh4
    df['cuff_rating']=5-df[['< thresh1','< thresh2','< thresh3','< thresh4']].sum(axis=1)
    
    overall_cuff_rating=df['cuff_rating'][7]
    
    return overall_cuff_rating, df[['felony', 'cuff_rating']]

In [247]:
location=(40.721886, -73.996749)

In [248]:
rating, df = cuff_rating(location)

In [249]:
rating

4

In [250]:
df

Unnamed: 0,felony,cuff_rating
0,BURGLARY,4
1,FELONY ASSAULT,3
2,GRAND LARCENY,4
3,GRAND LARCENY OF MOTOR VEHICLE,1
4,MURDER & NON-NEGL. MANSLAUGHTE,3
5,RAPE,2
6,ROBBERY,3
7,TOTAL FELONIES,4


In [254]:
def manual_input():
    print('input latitude:')
    lat=input()
    print('input longitude:')
    lon=input()
    location = (float(lat),float(lon))
    rating, df = cuff_rating(location)
    
    print('The cuff rating for this location is ', rating)
    print()
    print('The cuff rating breakdown per felony type is as follows:')
    print(df)

In [255]:
manual_input()

input latitude:
40.721886
input longitude:
-73.996749
The cuff rating for this location is  4

The cuff rating breakdown per felony type is as follows:
                           felony  cuff_rating
0                        BURGLARY            4
1                  FELONY ASSAULT            3
2                   GRAND LARCENY            4
3  GRAND LARCENY OF MOTOR VEHICLE            1
4  MURDER & NON-NEGL. MANSLAUGHTE            3
5                            RAPE            2
6                         ROBBERY            3
7                  TOTAL FELONIES            4
