# Importing the Cleaned Data (csv)


## Import Dependencies

In [29]:
# basic stuff
import os
import psycopg2
import pandas as pd
from pprint import pprint
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm 
import numpy as np
import psycopg2.extras

# Imports the method used to connect to DBs
from sqlalchemy import create_engine

# function to establish a session with a connected database
from sqlalchemy.orm import Session

# database compliant datatypes
from sqlalchemy import Column, Integer, String, Float

## Setup the PostgreSQL engine

In [30]:
# password is hard-coded in the connection string
engine = create_engine('postgresql://postgres:Duvel008@localhost:5432/energy_db')

## Clear out data first
### Start with the fact (dependent) tables first, then drop foreign keys, truncate rest of tables and then re-add keys

In [31]:
# state_greenhouse_emissions
engine.execute('TRUNCATE TABLE state_greenhouse_emissions;')
engine.execute('TRUNCATE TABLE region_degree_days;')
engine.execute('TRUNCATE TABLE facility_emissions;')
engine.execute('TRUNCATE TABLE air_quality;')
engine.execute('TRUNCATE TABLE state_data;')
engine.execute('TRUNCATE TABLE state_region;')

# list of foreign keys to be dropped so the basic data can be truncated

# "fk_facility_state"	"facility"
# "fk_state_region_region"	"state_region"
# "fk_state_region_state"	"state_region"
# "fk_state_greenhouse_emissions_state"	"state_greenhouse_emissions"
# "fk_region_degree_days_region"	"region_degree_days"
# "fk_facility_emissions_facility_id"	"facility_emissions"
# "fk_air_quality_state"	"air_quality"
# "fk_state_data_state"	"state_data"

# drop all the foreign keys
engine.execute('ALTER TABLE facility DROP CONSTRAINT fk_facility_state;')
engine.execute('ALTER TABLE state_region DROP CONSTRAINT fk_state_region_region;')
engine.execute('ALTER TABLE state_region DROP CONSTRAINT fk_state_region_state;')
engine.execute('ALTER TABLE state_greenhouse_emissions DROP CONSTRAINT fk_state_greenhouse_emissions_state;')
engine.execute('ALTER TABLE region_degree_days DROP CONSTRAINT fk_region_degree_days_region;')
engine.execute('ALTER TABLE facility_emissions DROP CONSTRAINT fk_facility_emissions_facility_id;')
engine.execute('ALTER TABLE air_quality DROP CONSTRAINT fk_air_quality_state;')
engine.execute('ALTER TABLE state_data DROP CONSTRAINT fk_state_data_state;')

# truncate the rest of the tables
engine.execute('TRUNCATE TABLE state;')
engine.execute('TRUNCATE TABLE facility;')
engine.execute('TRUNCATE TABLE region;')

# add the keys back
engine.execute('ALTER TABLE facility ADD CONSTRAINT fk_facility_state FOREIGN KEY (state) REFERENCES state (state);')
engine.execute('ALTER TABLE state_region ADD CONSTRAINT fk_state_region_region FOREIGN KEY (region) REFERENCES region (region);')
engine.execute('ALTER TABLE state_region ADD CONSTRAINT fk_state_region_state FOREIGN KEY (state) REFERENCES state (state);')
engine.execute('ALTER TABLE state_greenhouse_emissions ADD CONSTRAINT fk_state_greenhouse_emissions_state FOREIGN KEY (state) REFERENCES state (state);')
engine.execute('ALTER TABLE region_degree_days ADD CONSTRAINT fk_region_degree_days_region FOREIGN KEY (region) REFERENCES region (region);')
engine.execute('ALTER TABLE facility_emissions ADD CONSTRAINT fk_facility_emissions_facility_id FOREIGN KEY (facility_id) REFERENCES facility (facility_id);')
engine.execute('ALTER TABLE air_quality ADD CONSTRAINT fk_air_quality_state FOREIGN KEY (state) REFERENCES state (state);')
engine.execute('ALTER TABLE state_data ADD CONSTRAINT fk_state_data_state FOREIGN KEY (state) REFERENCES state (state);')

<sqlalchemy.engine.result.ResultProxy at 0x218f934d5c8>

In [32]:
# just making sure tables are empty
engine.execute("SELECT * FROM state;").fetchall()

[]

## Importing the metadata first

### State

#### Import, preview

In [6]:
# states
state_file = os.path.join("..","Raw Data Files","state.csv")
state_df = pd.read_csv(state_file, encoding="ISO-8859-1")

# preview the raw data
state_df.head()

Unnamed: 0,state,state_name
0,AK,Alaska
1,AL,Alabama
2,AR,Arkansas
3,AZ,Arizona
4,CA,California


#### Write to PostgreSQL, return rows to verify
##### Caution, to re-run, you have to run the truncate table code above first

In [35]:
# write dataframe to table, replace the rows if they exist
state_df.to_sql('state', con=engine, if_exists='append', index=False)

# return the data to make sure it was appended correctly
engine.execute("SELECT * FROM state LIMIT 10").fetchall()


[('AK', 'Alaska'),
 ('AL', 'Alabama'),
 ('AR', 'Arkansas'),
 ('AZ', 'Arizona'),
 ('CA', 'California'),
 ('CO', 'Colorado'),
 ('CT', 'Connecticut'),
 ('DE', 'Delaware'),
 ('FL', 'Florida'),
 ('GA', 'Georgia')]

### region
#### import csv, examine df

In [34]:
# states
region_file = os.path.join("..","Raw Data Files","region.csv")
region_df = pd.read_csv(region_file, encoding="ISO-8859-1")

# preview the raw data
region_df.head()

Unnamed: 0,region,region_group
0,Pacific,West
1,Montain,West
2,West North Central,Midwest
3,West South Central,South
4,East North Central,Midwest


#### write df to table

In [36]:
# write dataframe to table, replace the rows if they exist
region_df.to_sql('region', con=engine, if_exists='append', index=False)

# return the data to make sure it was appended correctly
engine.execute("SELECT * FROM region LIMIT 10").fetchall()

[('Pacific', 'West'),
 ('Montain', 'West'),
 ('West North Central', 'Midwest'),
 ('West South Central', 'South'),
 ('East North Central', 'Midwest'),
 ('East South Central', 'South'),
 ('Middle Atlantic', 'Northeast'),
 ('New England', 'Northeast'),
 ('South Atlantic', 'South')]

### state_region
#### import csv, examine df

In [37]:
# states
state_region_file = os.path.join("..","Raw Data Files","state_region.csv")
state_region_df = pd.read_csv(state_region_file, encoding="ISO-8859-1")

# preview the raw data
state_region_df.head()

Unnamed: 0,state,region
0,WA,Pacific
1,OR,Pacific
2,CA,Pacific
3,MT,Montain
4,ID,Montain


#### write to database

In [38]:
# write dataframe to table, replace the rows if they exist
state_region_df.to_sql('state_region', con=engine, if_exists='append', index=False)

# return the data to make sure it was appended correctly
engine.execute("SELECT * FROM state_region LIMIT 10").fetchall()

[('WA', 'Pacific'),
 ('OR', 'Pacific'),
 ('CA', 'Pacific'),
 ('MT', 'Montain'),
 ('ID', 'Montain'),
 ('WY', 'Montain'),
 ('NV', 'Montain'),
 ('AZ', 'Montain'),
 ('UT', 'Montain'),
 ('CO', 'Montain')]