In [8]:
import pandas as pd
from sqlalchemy import create_engine
import psycopg2

In [9]:
#Store Accidental Death CSV into DataFrame
csv_file1 = "Accidental_Death_Data.csv"
accidental_death_df = pd.read_csv(csv_file1)
accidental_death_df.head()

#Clean Accidental Death DataFrame
new_accidental_death_df = accidental_death_df[['Incident ID', 'Incident Date', 'State', 'City Or County', '# Killed', '# Injured']].copy()
new_accidental_death_df.head()

#Rename Columns
clean_accidental_death_df = new_accidental_death_df.rename(columns={"Incident ID":"Incident_ID", "Incident Date":"Incident_Date", "State":"State", "City Or County":"CityorCounty", "# Killed":"Num_Killed", "# Injured":"Num_Injured"})
clean_accidental_death_df.head()

Unnamed: 0,Incident_ID,Incident_Date,State,CityorCounty,Num_Killed,Num_Injured
0,1494737,2-Sep-19,California,Oakland,1,0
1,1494530,1-Sep-19,Mississippi,Carriere,1,0
2,1495103,1-Sep-19,Alabama,Geneva,1,0
3,1491718,29-Aug-19,Michigan,Detroit,1,1
4,1490546,28-Aug-19,Virginia,Petersburg,1,0


In [10]:
#Store Accidental Injury CSV into DataFrame
csv_file2 = "Accidental_Injury_Data.csv"
accidental_injury_df = pd.read_csv(csv_file2)
accidental_injury_df.head()

#Clean Accidental Injury DataFrame
new_accidental_injury_df = accidental_injury_df[['Incident ID', 'Incident Date', 'State', 'City Or County', '# Killed', '# Injured']].copy()
new_accidental_injury_df.head()

#Rename Columns
clean_accidental_injury_df = new_accidental_injury_df.rename(columns={"Incident ID":"Incident_ID", "Incident Date":"Incident_Date", "State":"State", "City Or County":"CityorCounty", "# Killed":"Num_Killed", "# Injured":"Num_Injured"})
clean_accidental_injury_df.head()

Unnamed: 0,Incident_ID,Incident_Date,State,CityorCounty,Num_Killed,Num_Injured
0,1495009,2-Sep-19,Texas,Port Neches,0,1
1,1494710,2-Sep-19,Pennsylvania,Whitehall,0,1
2,1494614,2-Sep-19,Michigan,Detroit,0,1
3,1494595,2-Sep-19,Michigan,Detroit,0,1
4,1494523,2-Sep-19,Kentucky,Burnside,0,1


In [11]:
#Store Mass Shooting CSV into DataFrame
csv_file3 = "Mass_Shooting_Data.csv"
mass_shooting_df = pd.read_csv(csv_file3)
mass_shooting_df.head()

#Clean Mass Shooting DataFrame
new_mass_shooting_df = mass_shooting_df[['Incident ID', 'Incident Date', 'State', 'City Or County', '# Killed', '# Injured']].copy()
new_mass_shooting_df.head()

#Rename Columns
clean_mass_shooting_df = new_mass_shooting_df.rename(columns={"Incident ID":"Incident_ID", "Incident Date":"Incident_Date", "State":"State", "City Or County":"CityorCounty", "# Killed":"Num_Killed", "# Injured":"Num_Injured"})
clean_mass_shooting_df.head()

Unnamed: 0,Incident_ID,Incident_Date,State,CityorCounty,Num_Killed,Num_Injured
0,1494697,2-Sep-19,North Carolina,Greensboro,2,2
1,1495192,2-Sep-19,Alabama,Elkmont,5,0
2,1495141,2-Sep-19,Illinois,Chicago,0,4
3,1494669,1-Sep-19,North Carolina,Rocky Mount,0,4
4,1493374,1-Sep-19,Ohio,Toledo,0,4


In [12]:
#Create Engine and connection to Database
engine = create_engine('postgres://postgres:PASSWORD@localhost:5432/Gun_Violence')
conn = engine.connect()

In [13]:
#Verify tables
engine.table_names()

['mass_shootings',
 'accidental_deaths',
 'accidental_injuries',
 'Mass_Shootings']

In [14]:
#Use pandas to load mass shooting csv into converted DataFrame into database
clean_mass_shooting_df.to_sql(name='mass_shootings', con=engine, if_exists='append', index=False)


In [15]:
#confirm data has been added by querying table
pd.read_sql_query('select * from mass_shootings', con=engine).head()

Unnamed: 0,Incident_ID,Incident_Date,State,CityorCounty,Num_Killed,Num_Injured
0,1494697,2019-09-02,North Carolina,Greensboro,2,2
1,1495192,2019-09-02,Alabama,Elkmont,5,0
2,1495141,2019-09-02,Illinois,Chicago,0,4
3,1494669,2019-09-01,North Carolina,Rocky Mount,0,4
4,1493374,2019-09-01,Ohio,Toledo,0,4


In [16]:
#Use pandas to load mass shooting csv into converted DataFrame into database
clean_accidental_injury_df.to_sql(name='accidental_injuries', con=engine, if_exists='append', index=False)


In [17]:
#confirm data has been added by querying table
pd.read_sql_query('select * from accidental_injuries', con=engine).head()

Unnamed: 0,Incident_ID,Incident_Date,State,CityorCounty,Num_Killed,Num_Injured
0,1495009,2019-09-02,Texas,Port Neches,0,1
1,1494710,2019-09-02,Pennsylvania,Whitehall,0,1
2,1494614,2019-09-02,Michigan,Detroit,0,1
3,1494595,2019-09-02,Michigan,Detroit,0,1
4,1494523,2019-09-02,Kentucky,Burnside,0,1


In [18]:
#Use pandas to load mass shooting csv into converted DataFrame into database
clean_accidental_death_df.to_sql(name='accidental_deaths', con=engine, if_exists='append', index=False)


In [19]:
#confirm data has been added by querying table
pd.read_sql_query('select * from accidental_deaths', con=engine).head()

Unnamed: 0,Incident_ID,Incident_Date,State,CityorCounty,Num_Killed,Num_Injured
0,1494737,2019-09-02,California,Oakland,1,0
1,1494530,2019-09-01,Mississippi,Carriere,1,0
2,1495103,2019-09-01,Alabama,Geneva,1,0
3,1491718,2019-08-29,Michigan,Detroit,1,1
4,1490546,2019-08-28,Virginia,Petersburg,1,0


In [42]:
## Sample queries to address some of project questions

#Which States have the highest number of incidents (killed v. injured)
# Mass Shootings:

Mass_Shootings_State = clean_mass_shooting_df.groupby("State").count().sort_values(["Num_Killed"], ascending=False)
Mass_Shootings_State



Unnamed: 0_level_0,Incident_ID,Incident_Date,CityorCounty,Num_Killed,Num_Injured
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
California,59,59,59,59,59
Illinois,57,57,57,57,57
Texas,30,30,30,30,30
Pennsylvania,28,28,28,28,28
Maryland,24,24,24,24,24
Florida,23,23,23,23,23
Georgia,21,21,21,21,21
Missouri,20,20,20,20,20
Tennessee,19,19,19,19,19
Louisiana,19,19,19,19,19
