# Energy/Emissions/Air Quality Data analysis
## Group effort to review datasets

### Import dependencies, setup database connection

In [1]:
import os
import psycopg2
import pandas as pd
from pprint import pprint
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm 
import numpy as np
import psycopg2.extras
import plotly.express as px

In [2]:
# setup connection
from sqlalchemy import create_engine

hostname = 'localhost'
username = 'postgres'
password = 'postgres'
database = 'energy_db'

connection = psycopg2.connect(host=hostname, user=username, password=password, dbname=database )

### Just curious (who burns the most coal?)

In [3]:
top_coal_df = pd.read_sql_query('select * from "top_10_coal_state_stats"',con=connection)

top_coal_df.head()

Unnamed: 0,state,year,generation_mwh,co2_mt,so2_mt,nox_mt
0,IL,1990,54966018.0,58336624.0,845232.0,330004.0
1,IN,1990,96925554.0,95600952.0,1273237.0,484016.0
2,KY,1990,70500461.0,66585266.0,818339.0,300852.0
3,MI,1990,67066870.0,65192865.0,362913.0,295425.0
4,MO,1990,48796793.0,48378620.0,720786.0,261405.0


In [18]:
aqi_df = pd.read_sql_query('select * from "state_aqi_pct_change" order by state',con=connection)

# aqi_df.head()
aqi_df.head()

Unnamed: 0,state,good_days_pct_1990,bad_days_pct_1990,good_days_pct_2018,bad_days_pct_2018,good_days_pct_change,bad_days_pct_change
0,AK,97.0,3.0,96.0,4.0,-1.0,1.0
1,AL,88.0,12.0,100.0,0.0,12.0,-12.0
2,AR,91.0,9.0,99.0,1.0,8.0,-8.0
3,AZ,87.0,13.0,94.0,6.0,7.0,-7.0
4,CA,82.0,18.0,89.0,11.0,7.0,-7.0


In [17]:
fig = px.bar(
    aqi_df, 
    x="state", 
    y="good_days_pct_change",
    hover_data=['state'],
    title="AQI Good Days Change From 1990 to 2018 by State"
)
fig.show()

In [20]:
tx_aqi_df = pd.read_sql_query("select * from state_aqi_pct_by_year where state = 'TX' order by year",con=connection)

# aqi_df.head()
tx_aqi_df.head()

Unnamed: 0,state,year,good_days_percent,bad_days_percent
0,TX,1990,83.0,17.0
1,TX,1991,85.0,15.0
2,TX,1992,85.0,15.0
3,TX,1993,88.0,12.0
4,TX,1994,89.0,11.0


In [22]:
fig = px.line(
    tx_aqi_df, 
    x="year", 
    y="good_days_percent",
    hover_data=['state'],
    title="Texas AQI % Good Days by Year"
)
fig.show()

In [23]:
# WV, where there was a lot of coal burning
wv_aqi_df = pd.read_sql_query("select * from state_aqi_pct_by_year where state = 'WV' order by year",con=connection)

# aqi_df.head()
wv_aqi_df.head()

Unnamed: 0,state,year,good_days_percent,bad_days_percent
0,WV,1990,66.0,34.0
1,WV,1991,65.0,35.0
2,WV,1992,75.0,25.0
3,WV,1993,70.0,30.0
4,WV,1994,72.0,28.0


In [24]:
fig = px.line(
    wv_aqi_df, 
    x="year", 
    y="good_days_percent",
    hover_data=['state'],
    title="West Virginia AQI % Good Days by Year"
)
fig.show()

In [25]:
# WV, where there was a lot of coal burning
hi_aqi_df = pd.read_sql_query("select * from state_aqi_pct_by_year where state = 'HI' order by year",con=connection)

# aqi_df.head()
hi_aqi_df.head()

Unnamed: 0,state,year,good_days_percent,bad_days_percent
0,HI,1990,93.0,7.0
1,HI,1991,93.0,7.0
2,HI,1992,90.0,10.0
3,HI,1993,92.0,8.0
4,HI,1994,94.0,6.0


In [26]:
fig = px.line(
    hi_aqi_df, 
    x="year", 
    y="good_days_percent",
    hover_data=['state'],
    title="Hawaii AQI % Good Days by Year"
)
fig.show()