# Goal - Understand USA grid and what resources are located where
* What is the pricing per State and what energy resource contribute to that
* Do State Taxes or National Taxes influence energy prices

# To Do
* Write dictionary describing the fuel_types

[EIA Vocab](https://www.eia.gov/tools/glossary/index.php?id=G)

# Getting Powerplant Data

# Probably can query this instead of downloading each link below
# https://www.eia.gov/opendata/qb.php?category=1017

* [BioMass](https://atlas.eia.gov/datasets/biomass-2/explore?location=44.619557%2C61.504001%2C3.67&showTable=true) - Organic nonfossil material of biological origin constituting a renewable energy source.
* [Petroleum](https://atlas.eia.gov/datasets/petroleum-2/explore?location=44.619557%2C61.504001%2C3.67)
* [Other](https://atlas.eia.gov/datasets/other/explore?location=44.619557%2C61.504001%2C3.67)
* [Battery Storage](https://atlas.eia.gov/datasets/battery-storage/explore?location=44.619557%2C61.504001%2C3.67)
* [Hydro Electric](https://atlas.eia.gov/datasets/hydroelectric-2/explore?location=44.619557%2C61.504001%2C3.67)
* [Nuclear](https://atlas.eia.gov/datasets/nuclear/explore?location=44.619557%2C61.504001%2C3.67)
* [Solar](https://atlas.eia.gov/datasets/solar-2/explore?location=44.619557%2C61.504001%2C3.67)
* [Wind](https://atlas.eia.gov/datasets/wind-2/explore?location=44.619557%2C61.504001%2C3.67)
* [Coal](https://atlas.eia.gov/datasets/coal-1/explore?location=44.619557%2C61.504001%2C3.67)
* [Natural Gas](https://atlas.eia.gov/datasets/natural-gas-1/explore?location=44.619557%2C61.504001%2C3.67)
* [Geothermal](https://atlas.eia.gov/datasets/geothermal-1/explore?location=44.619557%2C61.504001%2C3.67)
* [Pumped Storage](https://atlas.eia.gov/datasets/pumped-storage/explore?location=44.619557%2C61.504001%2C3.67)

* [Map](https://atlas.eia.gov/maps/power-plants-1/about)

* **Generation** is a measure of electricity produced over time. Most electric power plants use some of the electricity they produce to operate the power plant.
* **Capacity** is the maximum level of electric power (electricity) that a power plant can supply at a specific point in time under certain conditions.
* **Sales** are the amount of electricity sold to customers over a period of time, and they account for most of U.S. electricity consumption.

In [None]:
from google.colab import drive

drive.mount('./drive/')

%cd drive/Shareddrives/Data606_Energy

Mounted at ./drive/
/content/drive/Shareddrives/Data606_Energy


In [None]:
import sys
import pandas as pd
import numpy as np
import glob
from tqdm.autonotebook import tqdm
import plotly.express as px
sys.path.append("helpers/")

from energygrid import EGRID

from helper_functions import write_csv,combine_like_files

  """


In [None]:
energy_grid = EGRID()
energy_grid.get_states()
energy_grid.get_plant_fuel_types()

In [None]:

#Net_Generation/plant_by_state/low_level_combine_net_generation_primsource.feather
df_pp = pd.read_feather("data/powerplant/plants_by_state/Net_Generation/LowLevel_FuelType/combine/low_level_combine_net_generation_primsource.feather")

In [None]:
df_pp

Unnamed: 0,series_id,name,f,source,lat,lon,latlon,updated,country,state,mWh,date,year,plant_code,fuel_type,description
0,ELEC.PLANT.GEN.589-DFO-ALL.M,Net generation : J C McNeil (589) : distillate...,M,"EIA, U.S. Energy Information Administration",44.4917,-73.208056,"44.4917,-73.208056",2021-09-23T14:54:35-0400,USA,VT,0.00000,2021-07-01,2021,589,DFO,
1,ELEC.PLANT.GEN.589-DFO-ALL.M,Net generation : J C McNeil (589) : distillate...,M,"EIA, U.S. Energy Information Administration",44.4917,-73.208056,"44.4917,-73.208056",2021-09-23T14:54:35-0400,USA,VT,0.00000,2021-06-01,2021,589,DFO,
2,ELEC.PLANT.GEN.589-DFO-ALL.M,Net generation : J C McNeil (589) : distillate...,M,"EIA, U.S. Energy Information Administration",44.4917,-73.208056,"44.4917,-73.208056",2021-09-23T14:54:35-0400,USA,VT,0.00000,2021-05-01,2021,589,DFO,
3,ELEC.PLANT.GEN.589-DFO-ALL.M,Net generation : J C McNeil (589) : distillate...,M,"EIA, U.S. Energy Information Administration",44.4917,-73.208056,"44.4917,-73.208056",2021-09-23T14:54:35-0400,USA,VT,0.00000,2021-04-01,2021,589,DFO,
4,ELEC.PLANT.GEN.589-DFO-ALL.M,Net generation : J C McNeil (589) : distillate...,M,"EIA, U.S. Energy Information Administration",44.4917,-73.208056,"44.4917,-73.208056",2021-09-23T14:54:35-0400,USA,VT,0.00000,2021-03-01,2021,589,DFO,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1864419,ELEC.PLANT.GEN.52152-MSB-ALL.M,Net generation : International Paper Franklin ...,M,"EIA, U.S. Energy Information Administration",36.6803,-76.912800,"36.6803,-76.9128",2016-07-07T17:18:42-0400,USA,VA,1642.93995,2006-05-01,2006,52152,MSB,
1864420,ELEC.PLANT.GEN.52152-MSB-ALL.M,Net generation : International Paper Franklin ...,M,"EIA, U.S. Energy Information Administration",36.6803,-76.912800,"36.6803,-76.9128",2016-07-07T17:18:42-0400,USA,VA,1704.18123,2006-04-01,2006,52152,MSB,
1864421,ELEC.PLANT.GEN.52152-MSB-ALL.M,Net generation : International Paper Franklin ...,M,"EIA, U.S. Energy Information Administration",36.6803,-76.912800,"36.6803,-76.9128",2016-07-07T17:18:42-0400,USA,VA,1760.68558,2006-03-01,2006,52152,MSB,
1864422,ELEC.PLANT.GEN.52152-MSB-ALL.M,Net generation : International Paper Franklin ...,M,"EIA, U.S. Energy Information Administration",36.6803,-76.912800,"36.6803,-76.9128",2016-07-07T17:18:42-0400,USA,VA,1613.66320,2006-02-01,2006,52152,MSB,


In [None]:
#https://plotly.com/python/mapbox-layers/

fig = px.scatter_mapbox(df_pp, lat="lat", lon="lon", hover_name="state", hover_data=["state", "plant_code","fuel_type"], zoom=3, height=300)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
keep_cols = [col for col in df_pp.columns if (col.startswith("R") and col.endswith("MWh")) or col == "Year" or col == "State"]

px.df_pp[keep_cols]

Unnamed: 0,Year,State,variable,value
0,2008,AK,R_AB_MWh,0.000
1,2008,AL,R_AB_MWh,0.000
2,2008,AR,R_AB_MWh,0.000
3,2008,AZ,R_AB_MWh,0.000
4,2008,CA,R_AB_MWh,0.001
...,...,...,...,...
24695,2020,VT,R_WO_MWh,0.000
24696,2020,WA,R_WO_MWh,0.000
24697,2020,WI,R_WO_MWh,0.000
24698,2020,WV,R_WO_MWh,0.000


In [None]:
def calc_correlation(df,method="pearson",threshold=1):

    #Create Correlation Matrix
    corr = abs(df.corr(method=method).dropna(axis=1,how="all"))    

    #Specifies what columns to keep based on threshold and inverse threshold
    upper_tri = corr.where(np.triu(np.ones(corr.shape),k=1).astype(np.bool))
    to_drop = [column for column in upper_tri.columns if any(upper_tri[column] >= threshold)]

    #Drops specified columns
    corr = corr.drop(columns=to_drop)#columns

    return corr

In [None]:
energy_grid.plant_fuel_types

{'Disolate Fue Oil': 'DFO',
 'Natural Gas': 'NG',
 'Nuclear': 'NUC',
 'Other': 'OTH',
 'Solar': 'SUN',
 'Wind': 'WND',
 'agricultural by-products': 'AB',
 'batteries or other use of electricity as an energy source': 'MWH',
 'biogenic municipal solid waste': 'MSB',
 'bituminous coal': 'BIT',
 'black liquour': 'BLQ',
 'blast furnace gas': 'BFG',
 'coal-derived synthetic gas': 'SGC',
 'gaseous propane': 'PG',
 'geothermal': 'GEO',
 'hydroelectric': 'WAT',
 'jet fuel': 'JF',
 'kerosene': 'KER',
 'landfill gas': 'LFG',
 'lignite coal': 'LIG',
 'municipal solid waste': 'MSW',
 'non-biogenic municipal solid waste': 'MSN',
 'other biomass gas': 'OBG',
 'other biomass solids': 'OBS',
 'other gas': 'OG',
 'petroleum coke': 'PC',
 'purchased steam': 'PUR',
 'refined coal': 'RC',
 'residual fuel oil': 'RFO',
 'sludge waste': 'SLW',
 'subbituminous coal': 'SUB',
 'synthetic coal': 'SC',
 'tire-derived fuels': 'TDF',
 'waste heat': 'WH',
 'waste/other coal': 'WC',
 'waste/other oil': 'WO',
 'wood/wo

In [None]:
keep_cols = set()

mwh_cols = [col for col in df_pp.columns if col.endswith("MWh")]

for col in df_pp[mwh_cols].columns:

    for row in df_pp[mwh_cols].columns:

        if col == row:

            continue

        else:

            corr_score = abs(df_pp[col].corr(df_pp[row]))

            if  corr_score >= 0.30:

                keep_cols.add(col)

In [None]:
import plotly.figure_factory as ff


corr = round(df_pp[keep_cols].corr(),2)
mask = np.triu(np.ones_like(corr, dtype=bool))


cols = [col.lstrip("R_").rstrip("_MWh") for col in df_pp[keep_cols].columns]

inv_map = {v: k for k, v in energy_grid.plant_fuel_types.items()}

hover_text = [col if col not in inv_map.keys() else inv_map[col] for col in cols]

fig = ff.create_annotated_heatmap(corr.mask(mask).to_numpy(),
                                  x=cols,
                                  y=cols,
                                  colorscale="delta",
                                  )

fig.update_layout(title_text="2008 - 2021 Correlation for MWh Columns (containing) at least 1 >= 0.30 linear correlation")

fig.show()

In [None]:
df_pp.columns

Index(['state', 'year', 'Agg_total_plant_MWh', 'Agg_total_plant_count',
       'R_AB_MWh', 'R_AB_count', 'R_BFG_MWh', 'R_BFG_count', 'R_BIT_MWh',
       'R_BIT_count', 'R_BLQ_MWh', 'R_BLQ_count', 'R_DFO_MWh', 'R_DFO_count',
       'R_GEO_MWh', 'R_GEO_count', 'R_JF_MWh', 'R_JF_count', 'R_KER_MWh',
       'R_KER_count', 'R_LFG_MWh', 'R_LFG_count', 'R_LIG_MWh', 'R_LIG_count',
       'R_MSB_MWh', 'R_MSB_count', 'R_MSN_MWh', 'R_MSN_count', 'R_MSW_MWh',
       'R_MSW_count', 'R_MWH_MWh', 'R_MWH_count', 'R_NG_MWh', 'R_NG_count',
       'R_NUC_MWh', 'R_NUC_count', 'R_OBG_MWh', 'R_OBG_count', 'R_OBS_MWh',
       'R_OBS_count', 'R_OG_MWh', 'R_OG_count', 'R_OTH_MWh', 'R_OTH_count',
       'R_PC_MWh', 'R_PC_count', 'R_PG_MWh', 'R_PG_count', 'R_PUR_MWh',
       'R_PUR_count', 'R_RC_MWh', 'R_RC_count', 'R_RFO_MWh', 'R_RFO_count',
       'R_SC_MWh', 'R_SC_count', 'R_SGC_MWh', 'R_SGC_count', 'R_SLW_MWh',
       'R_SLW_count', 'R_SUB_MWh', 'R_SUB_count', 'R_SUN_MWh', 'R_SUN_count',
       'R_TDF_MWh', 

In [None]:
px.line(df_pp.sort_values("year",ascending=True),x="year",y="Agg_total_plant_count",color="state",title="Aggregate Plant Count by State")

In [None]:
(
px.line(df_pp.query('state != "CA" & state != "TX" & state != "NC"')
.sort_values("year",ascending=True)
,x="year",y="Agg_total_plant_count"
,color="state",title="Aggregate Plant Count by State")

)

In [None]:
px.line(df_pp.sort_values("year",ascending=True),x="year",y="Agg_total_plant_MWh",color="state",title="Aggregate Plant MWh Net Generation by State")

In [None]:
df_pp