## SPATIAL ANALYSIS

In [2]:
import os
import requests

import numpy as np
import pandas as pd
import seaborn as sns

import folium
import geopandas as gpd
import fiona
import shapely


import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
%matplotlib inline
import zipfile
from zipfile import ZipFile
from sklearn.neighbors import NearestNeighbors
from haversine import haversine, Unit

## Download the Shape file of US Counties


folder_root = '/Users/Anna/Documents/GitHub/Gun_Violence_E19/maps/tl_2017_us_county'

os.chdir(folder_root) # change directions
zip_file = zipfile.ZipFile(os.path.join(folder_root,'tl_2017_us_county.shp.zip')) # open the zip file
zip_file.extractall()
zip_file.close()
county_data = gpd.read_file('tl_2017_us_county.shp') # read the un-zipped shape-file
os.remove(os.path.join(folder_root, 'tl_2017_us_county.shp')) # delete the un-zipped file again, as it is too big to for GitHub sharing


county_data['Statecode'] = pd.to_numeric(county_data['STATEFP'])
county_data = county_data[['Statecode','NAME', 'NAMELSAD','geometry']]

state_names = pd.read_csv('state-geocodes-v2016.csv', sep = ';')
state_names = state_names[['State (FIPS)','Name']]
state_names

geo_county = pd.merge(county_data,state_names,left_on=['Statecode'], right_on = ['State (FIPS)'], how = 'left')

geo_county = geo_county[geo_county['Name']!='Alaska']
geo_county = geo_county[geo_county['Name']!='Hawaii']
geo_county = geo_county[geo_county['Name']!='Districts of Columbia']
geo_county = geo_county.dropna()
geo_county['Name'].unique()

fig, ax = plt.subplots(figsize = (20,20))
geo_county.plot(ax = ax)

plt.xlim(right = -65)
plt.xlim(left = -127)
plt.ylim(bottom = 22.5)


extreme = geo_county.bounds
min_x = extreme['minx'].min()
max_x = extreme['maxx'].max()
min_y = extreme['miny'].min()
max_y = extreme['maxy'].max()

horisontal_grid = np.arange(min_x,max_x,500)
vertical_grid = np.arange(min_y,max_y,500)

x = []
y = [] 

for h in horisontal_grid: 
    for v in vertical_grid: 
        x.append(h)
        y.append(v)

## Spatial Analysis

In [3]:
os.chdir('/Users/Anna/Documents/GitHub/Gun_Violence_E19/Data')
df = pd.read_csv('Data_incidents.csv.gz')

FileNotFoundError: [Errno 2] No such file or directory: 'Data_incidents.csv.gz'

In [3]:
## SPLIT THE DATA INTO THE SUB-GROUPS THAT WE ARE INTERESTED IN

#MASS SHOOTINGS
mass_shootings = df[df['Mass Shooting']== True].reset_index()

#NON SHOOTINGS
non_shootings = df[df['Non-Shooting Incident']== True]

#DEAD SHOOTING EXC. MASS SHOOTINGS
dead_shootings = df[df['Shot - Dead']==True]

dead_shooting = dead_shootings[dead_shootings['Mass Shooting']==False]
dead_shooting = dead_shootings[dead_shootings['Accidental Shooting']==False]


print(len(mass_shootings),len(non_shootings),len(dead_shooting))

1352 42134 47928


In [40]:
mass_shootings['state'].unique()
len(mass_shootings)

1352

In [4]:
X_ms = np.array(mass_shootings[['latitude','longitude']]) # extract the long-lat columns for KKN analysis
X_ns = np.array(non_shootings[['latitude','longitude']])
X_ds = np.array(dead_shootings[['latitude','longitude']])

## NON-SHOOTING NEIGHBOUR

In [5]:
distances_ns = []
for cor in range(len(X_ms)):
    frame = np.vstack((X_ms[cor],X_ns)) # stack eack ms incident with the comparable incidents
    frame = np.radians(frame) # Convert into radians
    kkn = NearestNeighbors(n_neighbors=2, algorithm='auto', metric='haversine').fit(frame)
    distance, indices = kkn.kneighbors([frame[0]]) # only return distance, indicies for the first row (the mass shooting)
    dist = distance[0][1] * 6371 # convert into km
    ns_cor = frame[indices[0][1]] #coordinates for the nearest neighbor (not saved outside loop)

    distances_ns.append(dist)


## DEAD-SHOOTING NEIGHBOUR

In [6]:
distances_ds = []
for cor in range(len(X_ms)):
    frame = np.vstack((X_ms[cor],X_ds))
    frame = np.radians(frame)
    kkn = NearestNeighbors(n_neighbors=2, algorithm='auto', metric='haversine').fit(frame)
    distance, indices = kkn.kneighbors([frame[0]])
    dist = distance[0][1] * 6371
    ns_cor = frame[indices[0][1]]

    distances_ds.append(dist)

## MASS-SHOOTING NEIGHBOUR

In [7]:
X_ms = np.radians(X_ms)
kkn = NearestNeighbors(n_neighbors=2, algorithm='auto', metric='haversine').fit(X_ms)
distance, indices = kkn.kneighbors()

In [8]:
distances_ms = []
for i in range(len(distance)):
    dist = distance[i][1] * 6371
    distances_ms.append(dist)

In [43]:
nn_ms = pd.DataFrame(distances_ms, columns = ['ms_distance']);
nn_ns = pd.DataFrame(distances_ns, columns = ['ns_distance']);
nn_ds = pd.DataFrame(distances_ds, columns = ['ds_distance']);

In [44]:
print(len(nn_ms), len(nn_ns), len(nn_ds))

1352 1352 1352


In [45]:
# The columns to dataframe
mass_shootings['MS_Dist'] = nn_ms
mass_shootings['NS_Dist'] = nn_ns
mass_shootings['DS_Dist'] = nn_ds

In [111]:
mass_shootings

Unnamed: 0.1,index,Unnamed: 0,incident_id,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,...,Black_AA_Pop,AmericanIndian_AlasNative_Pop,Asian_Pop,PacificNative_Pop,OtherRace_Pop,Social_Security,PerCapita_Income,MS_Dist,NS_Dist,DS_Dist
0,71,71,92194,Virginia,Norfolk,Rockingham Street and Berkley Avenue Extended,2,2,http://www.gunviolencearchive.org/incident/92194,http://www.dailypress.com/news/crime/dp-norfol...,...,,,,,,,,2.595616,0.447451,0.000000
1,321,321,92704,New York,Queens,Farmers Boulevard and 133rd Avenue,1,3,http://www.gunviolencearchive.org/incident/92704,http://abclocal.go.com/wabc/story,...,429817.0,8821.0,578028.0,1062.0,72589.0,209773.0,26572.0,2.630121,0.956968,0.000000
2,1481,1481,95500,Louisiana,Tallulah,3600 block of Highway 80 W,0,6,http://www.gunviolencearchive.org/incident/95500,http://www.myarklamiss.com/crime/update-3-new-...,...,,,,,,,,30.537472,26.329290,2.673553
3,1508,1508,95579,Illinois,Elgin,300 block of North Street,0,5,http://www.gunviolencearchive.org/incident/95579,http://www.chicagotribune.com/news/local/break...,...,29574.0,2082.0,19114.0,36.0,11808.0,45202.0,31869.0,38.002292,0.826531,0.640190
4,1567,1567,95550,Alabama,Huntsville,University Drive,0,5,http://www.gunviolencearchive.org/incident/95550,http://blog.al.com/breaking/2014/01/huntsville...,...,82298.0,1526.0,8709.0,341.0,13108.0,39030.0,32458.0,11.969211,2.151040,0.191025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1347,225745,225745,1074886,California,San Francisco,949 Geneva Ave,1,5,http://www.gunviolencearchive.org/incident/107...,https://www.sfgate.com/crime/article/Deceased-...,...,16374.0,3089.0,226445.0,8787.0,48865.0,73204.0,62017.0,4.530501,0.852293,0.000000
1348,226075,226075,1077180,Utah,Salt Lake City (West Valley City),3180 South 5600 West,0,4,http://www.gunviolencearchive.org/incident/107...,https://www.deseretnews.com/article/900014120/...,...,23220.0,9747.0,48833.0,17161.0,43551.0,89130.0,33658.0,20.641806,1.280196,0.622692
1349,226575,226575,1080234,Texas,Plano,7401 Alma Dr,1,3,http://www.gunviolencearchive.org/incident/108...,https://dallasinformer.com/272398/victim-who-d...,...,101248.0,4647.0,160058.0,154.0,30808.0,63271.0,43910.0,4.216156,3.052364,0.000000
1350,226620,226620,1080442,New Jersey,Camden,Kaighn Ave and Haddon Ave,0,4,http://www.gunviolencearchive.org/incident/108...,https://www.courierpostonline.com/story/news/c...,...,102304.0,609.0,30542.0,63.0,16122.0,66307.0,34891.0,5.916698,0.370844,0.000000


In [47]:
#extract relevant columns
ms_nn = mass_shootings[['state','County','NS_Dist','DS_Dist','MS_Dist']]

In [49]:
ms_nn_desc = ms_nn.groupby(['state','County']).describe().reset_index() # descriptive statistics on the KKN
ms_nn_desc_state = ms_nn.groupby(['state']).describe().reset_index() 


In [98]:
#sorted_desc = ms_nn_desc.sort_values(('NS_Dist', 'count'), ascending=False).head(10)
#sorted_desc
sorted_desc1 = ms_nn_desc_state.sort_values(('NS_Dist', 'count'), ascending=False).round(2)

#make range columns
sorted_desc1['NS_Range'] = sorted_desc1[('NS_Dist',   'min')].map(str) +'-'+ sorted_desc1[('NS_Dist',   'max')].map(str)
sorted_desc1['DS_Range'] = sorted_desc1[('DS_Dist',   'min')].map(str) +'-'+ sorted_desc1[('DS_Dist',   'max')].map(str)
sorted_desc1['MS_Range'] = sorted_desc1[('MS_Dist',   'min')].map(str) +'-'+ sorted_desc1[('MS_Dist',   'max')].map(str)

In [100]:
sorted_desc1.head(2)

Unnamed: 0_level_0,state,NS_Dist,NS_Dist,NS_Dist,NS_Dist,NS_Dist,NS_Dist,NS_Dist,NS_Dist,DS_Dist,...,MS_Dist,MS_Dist,MS_Dist,MS_Dist,MS_Dist,MS_Dist,MS_Dist,NS_Range,DS_Range,MS_Range
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,...,mean,std,min,25%,50%,75%,max,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,California,155.0,2.02,8.86,0.0,0.4,0.9,1.62,109.99,155.0,...,13.46,32.06,0.47,1.75,4.1,9.58,299.32,0.0-109.99,0.0-4.53,0.47-299.32
9,Illinois,133.0,0.86,0.87,0.0,0.34,0.64,1.15,7.58,133.0,...,6.96,16.65,0.22,0.74,1.3,3.52,112.31,0.0-7.58,0.0-6.78,0.22-112.31


In [112]:
#sorted_desc.iloc[:, [0,1,2,3,5,9,11,13,17,19,21,25]]
#sorted_desc1.iloc[:, [0,1,2,25,10,26,18,27]].head(10)
print(sorted_desc1.iloc[:, [0,1,2,25,10,26,18,27]].to_latex(index=False))

\begin{tabular}{lrrlrlrl}
\toprule
          state & \multicolumn{2}{l}{NS\_Dist} &     NS\_Range & DS\_Dist &   DS\_Range & MS\_Dist &       MS\_Range \\
                &   count & \multicolumn{2}{l}{mean} & \multicolumn{2}{l}{mean} & \multicolumn{2}{l}{mean} \\
\midrule
     California &   155.0 &   2.02 &   0.0-109.99 &    0.23 &   0.0-4.53 &   13.46 &    0.47-299.32 \\
       Illinois &   133.0 &   0.86 &     0.0-7.58 &    0.16 &   0.0-6.78 &    6.96 &    0.22-112.31 \\
        Florida &   102.0 &   1.45 &     0.0-28.3 &    0.49 &  0.0-20.04 &   16.12 &     0.28-129.1 \\
          Texas &    84.0 &   2.57 &    0.0-17.33 &    0.20 &   0.0-2.46 &   33.24 &    1.84-258.57 \\
       New York &    64.0 &   0.53 &     0.0-4.42 &    0.21 &   0.0-1.97 &    7.39 &    0.15-117.63 \\
        Georgia &    63.0 &   3.34 &    0.0-28.63 &    0.77 &  0.0-16.88 &   21.53 &    0.48-107.45 \\
      Louisiana &    54.0 &   1.04 &    0.0-26.33 &    0.15 &   0.0-2.67 &   11.57 &    0.75-116.26 \\
     

## Get Demographics for Couties and measure racial index

White_Pop
Black_AA_Pop
AmericanIndian_AlasNative_Pop
Asian_Pop
PacificNative_Pop
OtherRace_Pop


Unnamed: 0_level_0,Unnamed: 1_level_0,White_Pop,Black_AA_Pop,AmericanIndian_AlasNative_Pop,Asian_Pop,PacificNative_Pop,OtherRace_Pop,Social_Security,PerCapita_Income,Total_Pop,White_Pop_share,Black_AA_Pop_share,AmericanIndian_AlasNative_Pop_share,Asian_Pop_share,PacificNative_Pop_share,OtherRace_Pop_share
State,County,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Alabama,Baldwin County,178930.833333,20261.371795,1766.166667,1337.628205,4.615385,3224.628205,30606.474359,28905.333333,205525.243590,1.148630,10.143698,116.367978,153.649006,4.453047e+04,63.736106
Alabama,Calhoun County,85161.300000,23908.500000,343.437500,1078.350000,0.000000,2423.725000,17437.520408,23778.959184,112915.312500,1.325899,4.722810,328.779800,104.711191,inf,46.587510
Alabama,Cullman County,79136.000000,920.772727,323.909091,516.818182,0.000000,998.954545,11766.360000,21192.960000,81896.454545,1.034882,88.943180,252.837777,158.462797,inf,81.982163
Alabama,DeKalb County,60859.069767,1131.720930,781.813953,164.930233,881.093023,1599.325581,9677.162791,19522.093023,65417.953488,1.074909,57.803962,83.674579,396.640158,7.424636e+01,40.903462
Alabama,Elmore County,60618.930233,18080.069767,223.906977,217.488372,0.000000,1090.325581,9822.214286,25636.767857,80230.720930,1.323526,4.437523,358.321666,368.896600,inf,73.584187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wisconsin,Waukesha County,368180.443182,6358.437500,964.193182,13463.602273,120.738636,6103.340909,50485.153409,41973.329545,395190.755682,1.073362,62.152181,409.866781,29.352527,3.273109e+03,64.749907
Wisconsin,Winnebago County,156990.464789,3809.295775,985.887324,4568.507042,45.549296,2531.267606,21504.014085,29457.394366,168930.971831,1.076059,44.347035,171.349167,36.977282,3.708750e+03,66.737698
Wisconsin,Wood County,69340.630769,546.492308,409.507692,1457.030769,29.707692,1107.815385,11083.215385,27722.861538,72891.184615,1.051205,133.380074,177.997107,50.027210,2.453613e+03,65.797231
Wyoming,Laramie County,87651.148936,2549.000000,788.340426,1042.744681,156.340426,3542.106383,11855.234043,31567.063830,95729.680851,1.092167,37.555779,121.431907,91.805485,6.123156e+02,27.026202
