In [None]:
import numpy as np
import pandas as pd
import requests
import io


In [None]:
eia = pd.read_csv("./EIA_Plant_List.csv")
epa = pd.read_csv("./EPA_plant_List.csv")
walk = pd.read_csv("./TX_crosswalk.csv")

### EIA Matches

In [None]:
print("Number of EIA Matches: ", len(eia.loc[eia["match"] == True]))
print("Number of EIA Non-Matches: ", len(eia.loc[eia["match"] == False]))

print("Percent of EIA Matches: ", round(len(eia.loc[eia["match"] == True])/len(eia), 3))
print("Percent of EIA Non-Matches: ", round(len(eia.loc[eia["match"] == False])/len(eia), 3))

### EPA Matches

In [None]:
print("Number of EPA Matches: ", len(epa.loc[epa["match"] == True]))
print("Number of EPA Non-Matches: ", len(epa.loc[epa["match"] == False]))

print("Percent of EPA Matches: ", round(len(epa.loc[epa["match"] == True])/len(epa), 3))
print("Percent of EPA Non-Matches: ", round(len(epa.loc[epa["match"] == False])/len(epa), 3))

### EIA Fuel Breakdown

In [None]:
print("\nPercent Fuel Types Matched\n")
print(eia.loc[eia["match"] == True]["Energy Source 1"].value_counts(normalize=True).round(3))
print("\nPercent Fuel Types Not Matched\n")
print(eia.loc[eia["match"] == False]["Energy Source 1"].value_counts(normalize=True).round(3))

### EPA Fuel Breakdown

In [None]:
print("\nPercent Fuel Types Matched\n")
print(epa.loc[epa["match"] == True]["fuel"].value_counts(normalize=True).round(3))
print("\nPercent Fuel Types Not Matched\n")
print(epa.loc[epa["match"] == False]["fuel"].value_counts(normalize=True).round(3))

In [None]:
class EIA():
    def __init__(self, key=None):
        if key is None:
            print("You need a key in order to use this API.")
        else:
            self.key = key

    def electric_plant_all(self, plant_id):
        series_id = 'ELEC.PLANT.GEN.{}-ALL-ALL.Q'.format(plant_id)
        req = requests.get("https://api.eia.gov/series/?series_id={}&api_key={}".format(series_id, self.key))
        return pd.DataFrame.from_dict(req.json().get('series')[0].get('data'))

In [None]:
class EPA():
    def __init__(self, key=None):
        if key is None:
            print("You need a key in order to use this API.")
        else:
            self.key = key

    def hourlyData(self, orisCode, unitID, year, quarter):
        req = requests.get("https://api.epa.gov/FACT/1.0/emissions/hourlyData/csv/{}/{}/{}/{}?api_key={}".format(
            orisCode, 
            unitID, 
            year, 
            quarter, 
            self.key))
        if req.status_code == 204:
            print("HERE")
            return pd.DataFrame()
        con = req.content
        return pd.read_csv(io.StringIO(con.decode('utf-8')))

### Aggregate EPA vs EIA MWh in Q1 2019

In [None]:
epa_api = EPA("9ndbfvcHxIEgQ8KnDGhmVFdw3xiyOgqhhwdJg5Wo")
eia_api = EIA("9ndbfvcHxIEgQ8KnDGhmVFdw3xiyOgqhhwdJg5Wo")

In [None]:
epa_true = epa.loc[epa["match"] == True]
eia_true = eia.loc[eia["match"] == True]

In [None]:
walk = walk[["CAMD_PLANT_ID", "CAMD_UNIT_ID", "EIA_PLANT_ID", "EIA_GENERATOR_ID"]]
walk["EIA_PLANT_ID"] = walk["EIA_PLANT_ID"].astype(int)
# merged = eia_true.merge(walk, how="right", left_on=["Plant Code", "Generator ID"], right_on=["EIA_PLANT_ID", "EIA_GENERATOR_ID"])
# epa_true.merge(merged, how="right", left_on=["orisCode", "unitId"], right_on=["CAMD_PLANT_ID", "CAMD_UNIT_ID"]).dropna().drop_duplicates()


In [None]:
def get_eia_2019Q1(api,plant_id):
    temp = api.electric_plant_all(9)
    return temp.loc[temp[0] == "2019Q1"].reset_index(drop=True).at[0,1]

def get_EPA_2019Q1(api, plant_id, unit_id):
    return api.hourlyData(plant_id, unit_id, "2019", 1)


In [18]:
dic = {"plant_id":[], "eia":[], "epa":[], "valid":[]}
for i in walk["EIA_PLANT_ID"].unique():
    data = walk.loc[walk["CAMD_PLANT_ID"] == i]

    epa_sum = 0
    for j in data["CAMD_UNIT_ID"].unique():
        print(i, " : ", j)
        req = epa_api.hourlyData(i,j, "2019", 1)
        if req.empty:
            dic["plant_id"].append(i)
            dic["eia"].append("-1")
            dic["epa"].append("-1")
            dic["valid"].append(False)
            break
        epa_sum += epa_api.hourlyData(i,j, "2019", 1)["HourLoad"].sum()
    print(epa_sum)
    try:
        temp = eia_api.electric_plant_all(i)
        eia_sum = temp.loc[temp[0] == "2019Q1"].reset_index(drop=True).at[0,1]
    except: continue
    dic["plant_id"].append(i)
    dic["eia"].append(eia_sum)
    dic["epa"].append(epa_sum)
    dic["valid"].append(True)

yes = pd.DataFrame().from_dict(dic)


9  :  CTG-1
6290.0
127  :  1
962455.0
298  :  LIM1
298  :  LIM2
2054725.0
3439  :  4
3439  :  5
20660.0
3441  :  8
3441  :  9
265696.0
3443  :  9
134993.0
3452  :  1
3452  :  2
5866.0
3453  :  6
3453  :  7
3453  :  8
9295.0
3456  :  **4
3456  :  **5
3456  :  1
3456  :  2
3456  :  3
3456  :  GT-6A
3456  :  GT-6B
632405.0
3457  :  1
3457  :  2
424828.0
3459  :  1
3459  :  2
HERE
52242.0
3460  :  CBY1
3460  :  CBY2
27139.0
3464  :  GBY5
HERE
0
3468  :  SRB1
3468  :  SRB2
3468  :  SRB3
HERE
0.0
3469  :  THW31
3469  :  THW32
3469  :  THW33
3469  :  THW34
3469  :  THW41
3469  :  THW42
3469  :  THW43
3469  :  THW44
3469  :  THW51
3469  :  THW52
3469  :  THW53
3469  :  THW54
3469  :  THW55
3469  :  THW56
90224.0
3470  :  WAP1
3470  :  WAP2
3470  :  WAP3
3470  :  WAP4
3470  :  WAP5
3470  :  WAP6
3470  :  WAP7
3470  :  WAP8
2931231.0
3476  :  2
3476  :  3
3476  :  4
HERE
1677.0
3477  :  1
0.0
3478  :  1
3478  :  2
3478  :  3
113712.0
3482  :  151B
3482  :  152B
3482  :  153T
3482  :  154T
416774

In [21]:
yes = yes.loc[yes["valid"] == True]

In [22]:
yes["diff"] = yes["eia"] - yes["epa"]
yes["percent_diff"] = ((yes["eia"] - yes["epa"]) / yes["epa"]).abs() * 100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yes["diff"] = yes["eia"] - yes["epa"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yes["percent_diff"] = ((yes["eia"] - yes["epa"]) / yes["epa"]).abs() * 100


In [26]:
yes["diff"].abs().mean()

102452.95315873016

In [27]:
yes

Unnamed: 0,plant_id,eia,epa,valid,diff,percent_diff
0,9,8169.863,6290.0,True,1879.863,29.886534
1,127,886717,962455.0,True,-75738.0,7.869251
2,298,1911250.0,2054725.0,True,-143475.0,6.982686
3,3439,23064.461,20660.0,True,2404.461,11.638243
4,3441,253963,265696.0,True,-11733.0,4.415949
...,...,...,...,...,...,...
143,60122,1389193,1432681.0,True,-43488.0,3.035428
144,60264,12049.0,14322.0,True,-2273.0,15.870688
145,60459,11681.745,10963.0,True,718.745,6.556098
146,60460,12730.722,7523.0,True,5207.722,69.224006
