In [74]:
import pandas as pd
from sqlalchemy import create_engine

In [75]:
csv_file = "police_deaths.csv"
police_deaths_df = pd.read_csv(csv_file)
police_deaths_df.head()

Unnamed: 0,person,dept,eow,cause,cause_short,date,year,canine,dept_name,state
0,Constable Darius Quimby,"Albany County Constable's Office, NY","EOW: Monday, January 3, 1791",Cause of Death: Gunfire,Gunfire,1791-01-03,1791,False,Albany County Constable's Office,NY
1,Sheriff Cornelius Hogeboom,"Columbia County Sheriff's Office, NY","EOW: Saturday, October 22, 1791",Cause of Death: Gunfire,Gunfire,1791-10-22,1791,False,Columbia County Sheriff's Office,NY
2,Deputy Sheriff Isaac Smith,"Westchester County Sheriff's Department, NY","EOW: Thursday, May 17, 1792",Cause of Death: Gunfire,Gunfire,1792-05-17,1792,False,Westchester County Sheriff's Department,NY
3,Marshal Robert Forsyth,United States Department of Justice - United S...,"EOW: Saturday, January 11, 1794",Cause of Death: Gunfire,Gunfire,1794-01-11,1794,False,United States Department of Justice - United S...,US
4,Sheriff Robert Maxwell,"Greenville County Sheriff's Office, SC","EOW: Sunday, November 12, 1797",Cause of Death: Gunfire,Gunfire,1797-11-12,1797,False,Greenville County Sheriff's Office,SC


In [76]:
#Create filtered dataframe with specific columns
deaths_columns = ["year", "cause_short", "state"]
police_deaths_transformed = police_deaths_df[deaths_columns].copy()

In [77]:
#Rename the column headers
police_deaths_transformed = police_deaths_transformed.rename(columns = {"year" : "id"})
police_deaths_transformed.head()

Unnamed: 0,id,cause_short,state
0,1791,Gunfire,NY
1,1791,Gunfire,NY
2,1792,Gunfire,NY
3,1794,Gunfire,US
4,1797,Gunfire,SC


In [78]:
#Set index
police_deaths_transformed.set_index("id", inplace=True)
police_deaths_transformed.head()

Unnamed: 0_level_0,cause_short,state
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1791,Gunfire,NY
1791,Gunfire,NY
1792,Gunfire,NY
1794,Gunfire,US
1797,Gunfire,SC


In [79]:
#loc data on year 2015
year_filtered= police_deaths_transformed.loc['2015']
year_filtered.head()

Unnamed: 0_level_0,cause_short,state
id,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,Struck by vehicle,CO
2015,Heart attack,MO
2015,Vehicle pursuit,MD
2015,Automobile accident,TX
2015,Automobile accident,TX


In [80]:
#Create filter to get gunfire deaths only
gunfire_filtered= year_filtered.loc[year_filtered['cause_short'] == 'Gunfire']
gunfire_filtered.head()

Unnamed: 0_level_0,cause_short,state
id,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,Gunfire,GA
2015,Gunfire,PA
2015,Gunfire,US
2015,Gunfire,TR
2015,Gunfire,CA


In [81]:
#Group data by state
state_grouped = gunfire_filtered.groupby("state").count()
#Rename the column headers
state_grouped_df = state_grouped.rename(columns = {"cause_short" : "police_shooting_deaths"})
state_grouped_df.head()


Unnamed: 0_level_0,police_shooting_deaths
state,Unnamed: 1_level_1
AR,1
CA,4
CO,1
FL,1
GA,1


In [82]:
#Create database connection
connection_string = "postgres:Momw2kids@localhost:5432/shootings"
engine = create_engine(f'postgresql://{connection_string}')

In [83]:
#confirm table
engine.table_names()

  engine.table_names()


['police_deaths']

In [85]:
#Load DataFrames into database
state_grouped_df.to_sql(name='police_deaths', con=engine, if_exists='append', index= True)