# Minnesota Crime Data
----
- Crime data by police dept for each year: 2012-2014
- Crime data by neighborhood in Mpls for years 2017-2019

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Mpls neighborhood crime data
mpls_crime = pd.read_csv("Resources/Minneapolis_NEIGHBORHOOD_CRIME_STATS.csv")

# MN crime data by jurisdiction (2014, 2013, 2012)
mn_crime_2014 = pd.read_csv("Resources/MN_crime_data_2014.csv", header=8, skipfooter=19, engine='python')
mn_crime_2013 = pd.read_csv("Resources/MN_crime_data_2013.csv", header=8, skipfooter=19, engine='python')
mn_crime_2012 = pd.read_csv("Resources/MN_crime_data_2012.csv", header=8, skipfooter=19, engine='python')

### Pairing down the data for crime by police dept in mn for each year

In [2]:
# Scale down number of columns
columns_of_interest = \
['Agency',
 'Population',
 'Real Violent Crime Total',
 'Real Violent Crime Rate',
 'Property crime total',
 'Property crime rate',
 'Crime Total',
 'Crime Rate']

new_column_names = \
[
    'Agency',
    'Population',
    'VC_Total',
    'VC_Rate',
    'PC_Total',
    'PC_Rate',
    'Crime_Total',
    'Crime_Rate'    
]

In [3]:
# Crime data for 2014

# get violent crime total, then violent crime rate
mn_crime_2014['Real Violent Crime Total'] = mn_crime_2014['Murder and nonnegligent Manslaughter'] + \
                                            mn_crime_2014['Revised rape /2'] + \
                                            mn_crime_2014['Robbery'] + \
                                            mn_crime_2014['Aggravated assault']

mn_crime_2014['Real Violent Crime Rate'] = mn_crime_2014['Real Violent Crime Total'] / \
                                           (mn_crime_2014['Population'] / 100000)

# get total crimes and total crime rate
mn_crime_2014['Crime Total'] = mn_crime_2014['Property crime total'] + \
                                   mn_crime_2014['Real Violent Crime Total']

mn_crime_2014['Crime Rate'] = mn_crime_2014['Crime Total'] / \
                              (mn_crime_2014['Population'] / 100000)

crime_rates_2014 = mn_crime_2014[columns_of_interest]

# Rename columns
crime_rates_2014.columns = new_column_names

# Export smaller dataframe with real violent crime rate included
crime_rates_2014.to_csv("Resources/2014_MN_Crime_Rates_OUTPUT_FILE.csv", index=False)

In [4]:
# DO THE SAME FOR 2013

# get violent crime total, then violent crime rate
mn_crime_2013['Real Violent Crime Total'] = mn_crime_2013['Murder and nonnegligent Manslaughter'] + \
                                            mn_crime_2013['Revised rape /2'] + \
                                            mn_crime_2013['Robbery'] + \
                                            mn_crime_2013['Aggravated assault']

mn_crime_2013['Real Violent Crime Rate'] = mn_crime_2013['Real Violent Crime Total'] / \
                                           (mn_crime_2013['Population'] / 100000)

# get total crimes and total crime rate
mn_crime_2013['Crime Total'] = mn_crime_2013['Property crime total'] + \
                                   mn_crime_2013['Real Violent Crime Total']

mn_crime_2013['Crime Rate'] = mn_crime_2013['Crime Total'] / \
                              (mn_crime_2013['Population'] / 100000)

crime_rates_2013 = mn_crime_2013[columns_of_interest]

# Rename columns
crime_rates_2013.columns = new_column_names

# Export smaller dataframe with real violent crime rate included
crime_rates_2013.to_csv("Resources/2013_MN_Crime_Rates_OUTPUT_FILE.csv", index=False)

In [5]:
# DO THE SAME FOR 2012

# get violent crime total, then violent crime rate
mn_crime_2012['Real Violent Crime Total'] = mn_crime_2012['Murder and nonnegligent Manslaughter'] + \
                                            mn_crime_2012['Revised rape /2'] + \
                                            mn_crime_2012['Robbery'] + \
                                            mn_crime_2012['Aggravated assault']

mn_crime_2012['Real Violent Crime Rate'] = mn_crime_2012['Real Violent Crime Total'] / \
                                           (mn_crime_2012['Population'] / 100000)

# get total crimes and total crime rate
mn_crime_2012['Crime Total'] = mn_crime_2012['Property crime total'] + \
                                   mn_crime_2012['Real Violent Crime Total']

mn_crime_2012['Crime Rate'] = mn_crime_2012['Crime Total'] / \
                              (mn_crime_2012['Population'] / 100000)

crime_rates_2012 = mn_crime_2012[columns_of_interest]

# Rename columns
crime_rates_2012.columns = new_column_names

# Export smaller dataframe with real violent crime rate included
crime_rates_2012.to_csv("Resources/2012_MN_Crime_Rates_OUTPUT_FILE.csv", index=False)

In [6]:
# Drop rows with NaN values
## Each county's sherrif's office gets dropped
### It's okay because the city police depts are more relevant

crime_rates_2014.dropna(how='any', inplace=True)
crime_rates_2013.dropna(how='any', inplace=True)

# 2012 data doesn't contain violent crime data
crime_rates_2012 = crime_rates_2012.loc[crime_rates_2012['Agency'].str.contains('Sheriff') == False]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


### Merging dataframes

In [7]:
crimes_2013_2014 = pd.merge(crime_rates_2014.copy(), crime_rates_2013.copy(),
                           on='Agency', how='left', suffixes=['_2014', '_2013'])

crimes_2012_to_2014 = pd.merge(crimes_2013_2014.copy(), crime_rates_2012.copy(),
                           on='Agency', how='left', suffixes=['', '_2012'])

In [8]:
crimes_2013_2014

Unnamed: 0,Agency,Population_2014,VC_Total_2014,VC_Rate_2014,PC_Total_2014,PC_Rate_2014,Crime_Total_2014,Crime_Rate_2014,Population_2013,VC_Total_2013,VC_Rate_2013,PC_Total_2013,PC_Rate_2013,Crime_Total_2013,Crime_Rate_2013
0,Albert Lea Police Dept,17833.0,29,162.619862,398,2231.8,427,2394.437279,17867.0,35,195.891868,447,2501.8,482,2697.710864
1,Alexandria Police Dept,11713.0,26,221.975583,439,3748.0,465,3969.947921,11716.0,35,298.736770,363,3098.3,398,3397.063844
2,Anoka Police Dept,17295.0,52,300.664932,564,3261.1,616,3561.723041,17270.0,37,214.244354,565,3271.6,602,3485.813550
3,Apple Valley Police Dept,50472.0,42,83.214456,1042,2064.5,1084,2147.725472,50262.0,49,97.489157,1163,2313.9,1212,2411.364450
4,Austin Police Dept,24766.0,66,266.494387,585,2362.1,651,2628.603731,24823.0,61,245.739838,754,3037.5,815,3283.245377
5,Bemidji Police Dept,14513.0,63,434.093571,1174,8089.3,1237,8523.392820,13805.0,44,318.725100,1148,8315.8,1192,8634.552698
6,Big Lake Police Dept,10348.0,13,125.628141,143,1381.9,156,1507.537688,10296.0,17,165.112665,107,1039.2,124,1204.351204
7,Blaine Police Dept,61187.0,44,71.910700,1840,3007.2,1884,3079.085427,60093.0,49,81.540279,1717,2857.2,1766,2938.778227
8,Bloomington Police Dept,87163.0,157,180.122300,3202,3673.6,3359,3853.699391,87057.0,128,147.030107,3283,3771.1,3411,3918.122609
9,Brainerd Police Dept,13462.0,90,668.548507,655,4865.5,745,5534.095974,13493.0,55,407.618765,750,5558.4,805,5966.056474


In [9]:
crimes_2012_to_2014

Unnamed: 0,Agency,Population_2014,VC_Total_2014,VC_Rate_2014,PC_Total_2014,PC_Rate_2014,Crime_Total_2014,Crime_Rate_2014,Population_2013,VC_Total_2013,...,PC_Rate_2013,Crime_Total_2013,Crime_Rate_2013,Population,VC_Total,VC_Rate,PC_Total,PC_Rate,Crime_Total,Crime_Rate
0,Albert Lea Police Dept,17833.0,29,162.619862,398,2231.8,427,2394.437279,17867.0,35,...,2501.8,482,2697.710864,18082.0,,,494,2732.0,,
1,Alexandria Police Dept,11713.0,26,221.975583,439,3748.0,465,3969.947921,11716.0,35,...,3098.3,398,3397.063844,11192.0,,,320,2859.2,,
2,Anoka Police Dept,17295.0,52,300.664932,564,3261.1,616,3561.723041,17270.0,37,...,3271.6,602,3485.813550,17367.0,,,747,4301.3,,
3,Apple Valley Police Dept,50472.0,42,83.214456,1042,2064.5,1084,2147.725472,50262.0,49,...,2313.9,1212,2411.364450,49827.0,,,1116,2239.7,,
4,Austin Police Dept,24766.0,66,266.494387,585,2362.1,651,2628.603731,24823.0,61,...,3037.5,815,3283.245377,24993.0,,,712,2848.8,,
5,Bemidji Police Dept,14513.0,63,434.093571,1174,8089.3,1237,8523.392820,13805.0,44,...,8315.8,1192,8634.552698,13745.0,,,1332,9690.8,,
6,Big Lake Police Dept,10348.0,13,125.628141,143,1381.9,156,1507.537688,10296.0,17,...,1039.2,124,1204.351204,10222.0,,,144,1408.7,,
7,Blaine Police Dept,61187.0,44,71.910700,1840,3007.2,1884,3079.085427,60093.0,49,...,2857.2,1766,2938.778227,57953.0,,,1922,3316.5,,
8,Bloomington Police Dept,87163.0,157,180.122300,3202,3673.6,3359,3853.699391,87057.0,128,...,3771.1,3411,3918.122609,84596.0,,,3212,3796.9,,
9,Brainerd Police Dept,13462.0,90,668.548507,655,4865.5,745,5534.095974,13493.0,55,...,5558.4,805,5966.056474,13734.0,,,695,5060.4,,


In [10]:
# Create dataframe storing avg crime rates, populations for each department

avg_crime_rates = crime_rates_2014.filter(['Agency'], axis=1)

avg_columns = ['Avg_Population', 'Avg_VC', 'Avg_PC', 'Avg_Crime_Rate']

crime_rates_columns = ['Population', 'VC_Rate', 'PC_Rate', 'Crime_Rate']

# tracking index to iterate through avg_columns
index=0

for column in crime_rates_columns:
    
    avg_crime_rates[avg_columns[index]] = (crime_rates_2014[column] + crime_rates_2013[column]) / 2
    
    index += 1
    
avg_crime_rates.reset_index(drop=True, inplace=True)


In [14]:
# Remove extraneous substrings from 'Agency' names

agency_list = []
substrings = [' Police Dept', 'City Of ', ' Safety Dept',
              ' Dept Public Safety', ' City', ' Dept Of Public Safety']


for name in avg_crime_rates.Agency:

    for sub in substrings:
        
        if sub in name:
            
            name = str.replace(name, sub, '')
        
    agency_list.append(name)

# update agency column
avg_crime_rates.Agency = agency_list

# Output as CSV
avg_crime_rates.to_csv("Resources/2013-2014_MN_Crime_OUTPUT_FILE.csv", index=False)

Unnamed: 0,Agency,Avg_Population,Avg_VC,Avg_PC,Avg_Crime_Rate
0,Albert Lea,17850.0,179.255865,2366.80,2546.074071
1,Alexandria,11714.5,260.356176,3423.15,3683.505883
2,Anoka,17282.5,257.454643,3266.35,3523.768295
3,Apple Valley,50367.0,90.351806,2189.20,2279.544961
4,Austin,24794.5,256.117113,2699.80,2955.924554
5,Bemidji,14159.0,376.409335,8202.55,8578.972759
6,Big Lake,10322.0,145.370403,1210.55,1355.944446
7,Blaine,60640.0,76.725490,2932.20,3008.931827
8,Bloomington,87110.0,163.576203,3722.35,3885.911000
9,Brainerd,13477.5,538.083636,5211.95,5750.076224


In [None]:
avg_crime_rates.plot(x='Avg_PC', y='Avg_VC', kind='scatter', title='Violent Crime Rate vs. Property Crime Rate')

In [None]:
avg_crime_rates.plot(x='Avg_PC', y='Avg_Crime_Rate', kind='scatter', title='Crime Rate vs. Property Crime Rate')

In [None]:
avg_crime_rates.plot(x='Avg_PC', y='Avg_VC', kind='scatter', title='Violent Crime Rate vs. Property Crime Rate')

In [None]:
crime_df = pd.read_csv("Resources/2013-2014_MN_Avg_Crime_OUTPUT_FILE.csv")
crime_df.head()

In [None]:
# Remove extraneous substrings from 'Agency' names

agency_list = []
substrings = [' Police Dept', 'City Of ', ' Safety Dept',
              ' Dept Public Safety', ' City', ' Dept Of Public Safety']


for name in crime_df.Agency:

    for sub in substrings:
        
        if sub in name:
            
            name = str.replace(name, sub, '')
        
    agency_list.append(name)

In [None]:
crime_df.Agency = agency_list
crime_df

### Minneapolis crime data by neighborhood

In [None]:
# Begin working with data for minneapolis crime by neighborhood
mpls_crime.head()

In [None]:
# Create table showing avg number crimes reported by type for each
    # neighborhood over last few years
mpls_crime_avg = mpls_crime.pivot_table(values='number',
                                         index='neighborhood',
                                         columns='ucrDescription')
mpls_crime_avg

In [None]:
# Create multi-index series -- Neighborhood --> Type of Crime --> Year Reported --> Avg. Number Reported
mpls_crime_year_groups = mpls_crime.groupby(['neighborhood', 'ucrDescription', 'reportYear'])['number'].mean()

# Display as a Dataframe
mpls_neighborhoods = mpls_crime_year_groups.unstack()
mpls_neighborhoods