# PostgreSQL Database

This notebook is for creating a postgreSQL database from the exported csv files. Please be sure to verify your
database information in the config.py file.

In [1]:
# Load dependencies
import pandas as pd
from sqlalchemy import create_engine, MetaData, inspect, Table, Column, Integer, String, text
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy_utils import database_exists, create_database
from config import db_username, db_password, db_host, db_port, db_name, db_options

## Create Database

In [2]:
# Verify name of database to be created or updated
print(f'Database name: {db_name}')
print(f'Database host: {db_host}')
print(f'Database user: {db_username}')

Database name: us_hate_crimes_test
Database host: localhost
Database user: postgres


In [3]:
# Create SQLAlchemy database engine 
db_url = f'postgresql://{db_username}:{db_password}@{db_host}/{db_name}{db_options}'
engine = create_engine(db_url)

# Create database if it doesn't exist
if not database_exists(engine.url): create_database(engine.url)

## Create Database Tables and Views

In [4]:
# Create inspector to verify creation tables and views
inspector = inspect(engine)

In [5]:
# Execute SQL code to create all database tables
with engine.connect() as con:
    # Open sql and read sql file
    with open('schema_erd/db_schema_tables.sql') as file:
        query = text(file.read())
        con.execute(query)
        con.commit()

# Close database connection
con.close()
        
# Display new tables in database
print(f'Tables in Database: {inspector.get_table_names()}')

Tables in Database: ['jurisdiction', 'incident', 'state', 'race', 'incident_bias', 'bias', 'incident_offense', 'offense', 'incident_victim_type', 'victim_type', 'incident_location', 'location', 'census_data']


In [6]:
# Execute SQL code to create all database views
with engine.connect() as con:
    # Open sql and read sql file
    with open('schema_erd/db_schema_views.sql') as file:
        query = text(file.read())
        con.execute(query)
        con.commit()

# Close database connection
con.close()
        
# Display views in database
print(f'Views in Database: {inspector.get_view_names()}')

Views in Database: ['year_view', 'population_view', 'incident_view']


## Load Data into Database

In [7]:
# Store filenames in a list - order ensures no foreign key constraint errors
tables = ['jurisdiction', 'state', 'race', 'incident', 'bias', 'incident_bias',
          'offense', 'incident_offense', 'victim_type', 'incident_victim_type',
          'location', 'incident_location','census_data'
         ]

In [8]:
# Check if any tables contain data
for table in tables:
    df = pd.read_sql(f'select * from {table}', engine)
    rows = df.shape[0]
    if not rows:
        print(f'Table {table} is empty')
    else:
        print(f'Error: Table {table} contains data')

Table jurisdiction is empty
Table state is empty
Table race is empty
Table incident is empty
Table bias is empty
Table incident_bias is empty
Table offense is empty
Table incident_offense is empty
Table victim_type is empty
Table incident_victim_type is empty
Table location is empty
Table incident_location is empty
Table census_data is empty


In [9]:
# Loop through list, read each file into a dataframe, and then append to empty table in the database
for table in tables:
    df = pd.read_csv('transformed_data/' + table + '.csv')
    df.to_sql(table, engine, if_exists='append', index=False)
    print(f'Table created: {table}')

Table created: jurisdiction
Table created: state
Table created: race
Table created: incident
Table created: bias
Table created: incident_bias
Table created: offense
Table created: incident_offense
Table created: victim_type
Table created: incident_victim_type
Table created: location
Table created: incident_location
Table created: census_data


In [10]:
# Verify # of rows of data in each table
for table in tables:
    df = pd.read_sql(f'select * from {table}', engine)
    print(f'Rows in table {table}: {df.shape[0]}')

Rows in table jurisdiction: 8
Rows in table state: 51
Rows in table race: 7
Rows in table incident: 81666
Rows in table bias: 34
Rows in table incident_bias: 82716
Rows in table offense: 49
Rows in table incident_offense: 84555
Rows in table victim_type: 9
Rows in table incident_victim_type: 83043
Rows in table location: 46
Rows in table incident_location: 81790
Rows in table census_data: 3978


In [11]:
# Display a table to verify data
pd.read_sql('select * from jurisdiction', engine)

Unnamed: 0,jurisdiction_id,jurisdiction
0,0,City
1,1,County
2,2,State Police
3,3,Other State Agency
4,4,Federal
5,5,University or College
6,6,Tribal
7,7,Other


## Delete Database Data
This does not delete tables or views. 

In [None]:
tables = ['census_data', 'incident_location', 'location', 'incident_victim_type', 'victim_type',
          'incident_offense', 'offense', 'incident_bias', 'bias', 'incident', 'race', 'state',
          'jurisdiction']

metadata = MetaData()
metadata.reflect(engine)

# Open connection to database
with engine.connect() as con:
    for table in tables:
        table_obj = Table(table, metadata, autoload_with=engine)
        con.execute(table_obj.delete())
        con.commit()
        
# Close database connection
con.close()

In [None]:
# Verify tables are empty
for table in tables:
    df = pd.read_sql(f'select * from {table}', engine)
    print(f'{table} table contains {df.shape[0]} rows')

## Database Testing

In [12]:
# Create database engine
db_url = f'postgresql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}'
        
engine = create_engine(db_url)

In [13]:
# Create inspector for testing
inspector = inspect(engine)

In [14]:
# Display tables and views in database
print(f'Tables: {inspector.get_table_names()}')
print(f'Views: {inspector.get_view_names()}')

Tables: ['jurisdiction', 'incident', 'state', 'race', 'incident_bias', 'bias', 'incident_offense', 'offense', 'incident_victim_type', 'victim_type', 'incident_location', 'location', 'census_data']
Views: ['year_view', 'population_view', 'incident_view']


In [15]:
# Display columns in bias table
display(inspector.get_columns('bias'))

[{'name': 'bias_id',
  'type': INTEGER(),
  'nullable': False,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'bias',
  'type': VARCHAR(length=60),
  'nullable': False,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'bias_category',
  'type': VARCHAR(length=30),
  'nullable': False,
  'default': None,
  'autoincrement': False,
  'comment': None}]

In [16]:
# Create MetaData object and reflect existing database
metadata = MetaData()
metadata.reflect(engine)

# Create mappings
Base = automap_base(metadata=metadata)
Base.prepare(autoload_with=engine)

In [17]:
S = Base.classes.state

In [18]:
session = Session(engine)
results = session.query(S).with_entities(*[S.state, S.state_abbr]).all()
session.close()

for row in results:
    print(row)

('Alaska', 'AK')
('Alabama', 'AL')
('Arkansas', 'AR')
('Arizona', 'AZ')
('California', 'CA')
('Colorado', 'CO')
('Connecticut', 'CT')
('District of Columbia', 'DC')
('Delaware', 'DE')
('Florida', 'FL')
('Georgia', 'GA')
('Hawaii', 'HI')
('Iowa', 'IA')
('Idaho', 'ID')
('Illinois', 'IL')
('Indiana', 'IN')
('Kansas', 'KS')
('Kentucky', 'KY')
('Louisiana', 'LA')
('Massachusetts', 'MA')
('Maryland', 'MD')
('Maine', 'ME')
('Michigan', 'MI')
('Minnesota', 'MN')
('Missouri', 'MO')
('Mississippi', 'MS')
('Montana', 'MT')
('Nebraska', 'NB')
('North Carolina', 'NC')
('North Dakota', 'ND')
('New Hampshire', 'NH')
('New Jersey', 'NJ')
('New Mexico', 'NM')
('Nevada', 'NV')
('New York', 'NY')
('Ohio', 'OH')
('Oklahoma', 'OK')
('Oregon', 'OR')
('Pennsylvania', 'PA')
('Rhode Island', 'RI')
('South Carolina', 'SC')
('South Dakota', 'SD')
('Tennessee', 'TN')
('Texas', 'TX')
('Utah', 'UT')
('Virginia', 'VA')
('Vermont', 'VT')
('Washington', 'WA')
('Wisconsin', 'WI')
('West Virginia', 'WV')
('Wyoming', 'WY