---

## NOTEBOOK PURPOSE

##### The purpose of this notebook is to show how the data was cleaned and where the new csv was saved to so it can be used in its own notebook for further breakdown, analysis, and visualization.

---

In [1]:
import pandas as pd

# Data Cleaning for Mass Shooting Data

In [2]:
# reading in all mass shooting data as their own dataframe
data2014 = pd.read_csv('RawDatasets/mass_shooting_2014_gva.csv')
data2015 = pd.read_csv('RawDatasets/mass_shooting_2015_gva.csv')
data2016 = pd.read_csv('RawDatasets/mass_shooting_2016_gva.csv')
data2017 = pd.read_csv('RawDatasets/mass_shooting_2017_gva.csv')
data2018 = pd.read_csv('RawDatasets/mass_shooting_2018_gva.csv')

In [3]:
# creating new dataframe by appending all the dataframes together
# dropped unnecessary 'operations' column
# converted the 'incident date' to proper datetime format, extracted the year into it's own column for aggregation
mass_shooting_full_data = data2014.append([data2015,data2016,data2017,data2018])
mass_shooting_full_data = mass_shooting_full_data.reset_index(drop=True)
mass_shooting_full_data = mass_shooting_full_data.drop('Operations', axis=1)
mass_shooting_full_data['Incident Date'] = pd.to_datetime(mass_shooting_full_data['Incident Date'])
mass_shooting_full_data['Year'] = mass_shooting_full_data['Incident Date'].dt.year

In [4]:
mass_shooting_full_data.head()

Unnamed: 0,Incident Date,State,City Or County,Address,# Killed,# Injured,Year
0,2014-12-29,Louisiana,New Orleans,Poydras and Bolivar,0,4,2014
1,2014-12-27,California,Los Angeles,8800 block of South Figueroa Street,1,3,2014
2,2014-12-27,California,Sacramento,4000 block of May Street,0,4,2014
3,2014-12-26,Illinois,East St. Louis,2500 block of Summit Avenue,1,3,2014
4,2014-12-24,Missouri,Saint Louis,18th and Pine,1,3,2014


In [5]:
# exporting as a new, cleaned csv for use in other notebooks
mass_shooting_full_data.to_csv('CleanDatasets/mass_shooting_full_data.csv', index=False)

---

# Data Cleaning for School Gun Violence Data

In [6]:
# read in data into a dataframe 
# dropped unnecessary 'Unnamed: 0' column
# converted the 'date' to proper datetime format, extracted the year into it's own column for aggregation
school_shootings = pd.read_csv('RawDatasets/pah_us_school_gun_violence_amaral_lab.csv')
school_shootings = school_shootings.drop('Unnamed: 0', axis=1)
school_shootings['Date'] = pd.to_datetime(school_shootings['Date'])
school_shootings['Year'] = school_shootings['Date'].dt.year

In [7]:
school_shootings.head()

Unnamed: 0,Date,City,State,AreaType,School,Fatalities,Year
0,1990-05-20,Centerville,Tennessee,suburban,HS,1,1990
1,1990-08-26,Las Vegas,Nevada,urban,HS,1,1990
2,1991-01-08,Richardson,Texas,urban,HS,0,1991
3,1991-04-23,Compton,California,urban,MS,1,1991
4,1991-09-18,Crosby,Texas,rural,HS,1,1991


In [8]:
# exporting as a new, cleaned csv for use in other notebooks
school_shootings.to_csv('CleanDatasets/school_shootings.csv', index=False)

---

# Data Cleaning for Gun Death Data

In [9]:
# no real cleaning needed for this dataset
gun_deaths = pd.read_csv('RawDatasets/2012-2014_gun_death_data_CDC.csv')
gun_deaths.head()

Unnamed: 0,year,month,intent,police,sex,age,race,hispanic,place,education
0,2012,1,Suicide,0,M,34.0,Asian/Pacific Islander,100,Home,BA+
1,2012,1,Suicide,0,F,21.0,White,100,Street,Some college
2,2012,1,Suicide,0,M,60.0,White,100,Other specified,BA+
3,2012,2,Suicide,0,M,64.0,White,100,Home,BA+
4,2012,2,Suicide,0,M,31.0,White,100,Other specified,HS/GED


In [10]:
# exporting as a new, cleaned csv for use in other notebooks
gun_deaths.to_csv('CleanDatasets/gun_deaths.csv', index=False)