## Alabama gun violence
This file is an exploration into gun violence in Alabama from the years 2014 through 2017.

##### Import Python Modules

In [1]:
import folium
from folium.plugins import MarkerCluster
import pandas as pd
import numpy as np
import seaborn as sns

##### Setup our base map

In [12]:
gun_map = folium.Map(location=[32.3182, -86.9023], zoom_start=7, tiles='Stamen Terrain')

In [3]:
# gather and
gun_data_1 = pd.read_csv('al_gun_01.csv')
gun_data_2 = pd.read_csv('al_gun_02.csv')
gun_data_3 = pd.read_csv('al_gun_03.csv')
gun_data_4 = pd.read_csv('al_gun_04.csv')
gun_data_5 = pd.read_csv('al_gun_05.csv')

frames = [gun_data_1, gun_data_2, gun_data_3, gun_data_4, gun_data_5]
df = pd.concat(frames, ignore_index=True)

In [4]:
df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,X,address,city_or_county,date,incident_url,latitude,longitude,n_injured,n_killed
0,0,0,1,19,1300 block of Alabama Ave,Selma,2014-09-01,http://www.gunviolencearchive.org/incident/186796,32.40846,-87.01755,0,0
1,1,1,2,44,5900 Messer Airport Hwy,Birmingham,2014-09-01,http://www.gunviolencearchive.org/incident/192477,33.562078,-86.752285,0,0
2,2,2,3,152,3100 block of 20th Street,Tuscaloosa,2014-09-01,http://www.gunviolencearchive.org/incident/186638,33.194426,-87.576201,1,0
3,3,3,4,304,502 Barnett Boulevard,Tallassee,2014-09-02,http://www.gunviolencearchive.org/incident/186827,32.537157,-85.897266,0,0
4,4,4,5,305,1500 block of 53rd St,Birmingham (Ensley),2014-09-02,http://www.gunviolencearchive.org/incident/186723,33.48219,-86.896319,0,1


In [5]:
# Get a better idea of what our dataframe looks like
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11034 entries, 0 to 11033
Data columns (total 12 columns):
Unnamed: 0        11034 non-null int64
Unnamed: 0.1      11034 non-null int64
Unnamed: 0.1.1    11034 non-null int64
X                 11034 non-null int64
address           10303 non-null object
city_or_county    11034 non-null object
date              11034 non-null object
incident_url      11034 non-null object
latitude          10958 non-null float64
longitude         10958 non-null float64
n_injured         11034 non-null int64
n_killed          11034 non-null int64
dtypes: float64(2), int64(6), object(4)
memory usage: 1.0+ MB


In [7]:
# The data seems to have some duplicate entries.  We need to remove them
# actually, was duplicating every entry.  I'll need to review this later
df = df.drop_duplicates(subset='incident_url', keep='first')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5474 entries, 0 to 10178
Data columns (total 12 columns):
Unnamed: 0        5474 non-null int64
Unnamed: 0.1      5474 non-null int64
Unnamed: 0.1.1    5474 non-null int64
X                 5474 non-null int64
address           5110 non-null object
city_or_county    5474 non-null object
date              5474 non-null object
incident_url      5474 non-null object
latitude          5435 non-null float64
longitude         5435 non-null float64
n_injured         5474 non-null int64
n_killed          5474 non-null int64
dtypes: float64(2), int64(6), object(4)
memory usage: 556.0+ KB


I want to see what I'm working with.  Then I'll make decisions on how to best present the information.

In [None]:
# Use MarkerCluster for clarity and page speed
mc = MarkerCluster()

# build up datapoints
for i, row in df.iterrows():
    if(not np.isnan(row['latitude'])):
        folium.Marker(location=[row['latitude'], row['longitude']],
                      popup="<a href=%s> INCIDENT INFO </a> \n" %(row.incident_url) +
                            "<p>Date: %s" %(row.date) +
                            "<br>Number Killed:\t %d" %(row.n_killed) +
                            "<br>Number Injured:\t %d </p>" % (row.n_injured)).add_to(mc)

In [None]:
# gun_map
mc.add_to(gun_map)
gun_map.save('gun_map_2014_to_2017.html')

This data is good, but isn't organized in any meaningful way.  Time to subset by year.

In [8]:
df_2014 = df.loc[(df['date'] >= '2014-01-01') & (df['date'] <= '2014-12-31')]
df_2015 = df.loc[(df['date'] >= '2015-01-01') & (df['date'] <= '2015-12-31')]
df_2016 = df.loc[(df['date'] >= '2016-01-01') & (df['date'] <= '2016-12-31')]
df_2017 = df.loc[(df['date'] >= '2017-01-01') & (df['date'] <= '2017-12-31')]

Make HTML maps for each year.

In [14]:
map_2014 = folium.Map(location=[32.3182, -86.9023], zoom_start=7, tiles='Stamen Terrain')
map_2015 = folium.Map(location=[32.3182, -86.9023], zoom_start=7, tiles='Stamen Terrain')
map_2016 = folium.Map(location=[32.3182, -86.9023], zoom_start=7, tiles='Stamen Terrain')
map_2017 = folium.Map(location=[32.3182, -86.9023], zoom_start=7, tiles='Stamen Terrain')

years = [df_2014, df_2015, df_2016, df_2017]
clusters = [MarkerCluster(), MarkerCluster(), MarkerCluster(), MarkerCluster()]

# Build clusters for each years map
for i, year in enumerate(years):
    for _, row in year.iterrows():
        if(not np.isnan(row['latitude'])):
            folium.Marker(location=[row['latitude'], row['longitude']],
                          popup="<a href=%s> INCIDENT INFO </a> \n"%(row.incident_url) +
                                "<p>Date: %s"%(row.date) +
                                "<br>Number Killed:\t %d"%(row.n_killed) +
                                "<br>Number Injured:\t %d </p>"%(row.n_injured)).add_to(clusters[i])
    
clusters[0].add_to(map_2014)
clusters[1].add_to(map_2015)
clusters[2].add_to(map_2016)
clusters[3].add_to(map_2017)

map_2014.save('gun_map_2014.html')
map_2015.save('gun_map_2015.html')
map_2016.save('gun_map_2016.html')
map_2017.save('gun_map_2017.html')

### It may be more useful to see heatmaps

In [None]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt

year_labels = ['2014', '2015', '2016', '2017']
yearly_incidents = np.array([df_2014.shape[0], df_2015.shape[0], df_2016.shape[0], df_2017.shape[0]])

In [None]:
# show proportion of incidents as stacked bar
n_injured = np.array([sum(df_2014.n_injured > 0), 
                      sum(df_2015.n_injured > 0), 
                      sum(df_2016.n_injured > 0), 
                      sum(df_2017.n_injured > 0)])
n_killed = np.array([sum(df_2014.n_killed > 0), 
                     sum(df_2015.n_killed > 0), 
                     sum(df_2016.n_killed > 0), 
                     sum(df_2017.n_killed > 0)])

n_neither = yearly_incidents - (n_injured + n_killed)
plt.bar(x_labels, n_neither);
plt.bar(x_labels, n_injured, bottom=n_neither);
plt.bar(x_labels, n_killed, bottom=(n_neither+n_injured));
plt.legend(['No Reported Harm', 'Injured', 'Killed'], bbox_to_anchor=(1, 1.05))
plt.title('Gun Violence Incidents\nAlabama 2014-2017');
plt.ylabel('Yearly Incidents');