# Gun Violence Incidents Data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
%matplotlib inline 

In [2]:
# Load dataset
data = pd.read_csv('gunviolence.csv')

## Data Investigation

In [3]:
print(data.shape)
print(data.columns)

(239677, 29)
Index(['incident_id', 'date', 'state', 'city_or_county', 'address', 'n_killed',
       'n_injured', 'incident_url', 'source_url',
       'incident_url_fields_missing', 'congressional_district', 'gun_stolen',
       'gun_type', 'incident_characteristics', 'latitude',
       'location_description', 'longitude', 'n_guns_involved', 'notes',
       'participant_age', 'participant_age_group', 'participant_gender',
       'participant_name', 'participant_relationship', 'participant_status',
       'participant_type', 'sources', 'state_house_district',
       'state_senate_district'],
      dtype='object')


In [4]:
data.head(2)

Unnamed: 0,incident_id,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,...,participant_age,participant_age_group,participant_gender,participant_name,participant_relationship,participant_status,participant_type,sources,state_house_district,state_senate_district
0,461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male||1::Male||3::Male||4::Female,0::Julian Sims,,0::Arrested||1::Injured||2::Injured||3::Injure...,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://pittsburgh.cbslocal.com/2013/01/01/4-pe...,,
1,460726,2013-01-01,California,Hawthorne,13500 block of Cerise Avenue,1,3,http://www.gunviolencearchive.org/incident/460726,http://www.dailybulletin.com/article/zz/201301...,False,...,0::20,0::Adult 18+||1::Adult 18+||2::Adult 18+||3::A...,0::Male,0::Bernard Gillis,,0::Killed||1::Injured||2::Injured||3::Injured,0::Victim||1::Victim||2::Victim||3::Victim||4:...,http://losangeles.cbslocal.com/2013/01/01/man-...,62.0,35.0


## Data Pre-processing

In [5]:
# Clean values
col_list = ['participant_name', 'participant_type',  'participant_status', 'participant_gender', 'participant_age_group', 'participant_age', 'gun_stolen', 'gun_type']
data[col_list] = data[col_list].apply(lambda x: x.replace(r'\d::', '', regex=True))

# Assign Int64 dtypes
data[['congressional_district', 'n_guns_involved', 'state_house_district', 'state_senate_district']] = data[['congressional_district', 'n_guns_involved', 'state_house_district', 'state_senate_district']].astype('Int64')
data.incident_id = data.incident_id.astype(str)

# Set incident_id as index
data = data.set_index('incident_id')

In [6]:
# Creating one row per involved individual
table = data[data.columns[~data.columns.isin(col_list)]]
df = data[col_list]

temp = pd.DataFrame()
for i in range(len(col_list)):
    temp = pd.concat([temp, pd.DataFrame(df[col_list[i]].str.split(r'\|\|', expand=True)).stack().to_frame(name = col_list[i])], axis=1) 

table = pd.merge(table, temp, on= 'incident_id')
table.head(2)  

Unnamed: 0_level_0,date,state,city_or_county,address,n_killed,n_injured,incident_url,source_url,incident_url_fields_missing,congressional_district,...,state_house_district,state_senate_district,participant_name,participant_type,participant_status,participant_gender,participant_age_group,participant_age,gun_stolen,gun_type
incident_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,14,...,,,Julian Sims,Victim,Arrested,Male,Adult 18+,20.0,,
461105,2013-01-01,Pennsylvania,Mckeesport,1506 Versailles Avenue and Coursin Street,0,4,http://www.gunviolencearchive.org/incident/461105,http://www.post-gazette.com/local/south/2013/0...,False,14,...,,,,Victim,Injured,Male,Adult 18+,,,
