In [2]:
import numpy as np
import pandas as pd
import requests
import io


In [26]:
eia = pd.read_csv("./EIA_Plant_List.csv")
epa = pd.read_csv("./EPA_plant_List.csv")
walk = pd.read_csv("./TX_crosswalk.csv")

### EIA Matches

In [82]:
print("Number of EIA Matches: ", len(eia.loc[eia["match"] == True]))
print("Number of EIA Non-Matches: ", len(eia.loc[eia["match"] == False]))

print("Percent of EIA Matches: ", round(len(eia.loc[eia["match"] == True])/len(eia), 3))
print("Percent of EIA Non-Matches: ", round(len(eia.loc[eia["match"] == False])/len(eia), 3))

Number of EIA Matches:  429
Number of EIA Non-Matches:  707
Percent of EIA Matches:  0.378
Percent of EIA Non-Matches:  0.622


### EPA Matches

In [83]:
print("Number of EPA Matches: ", len(epa.loc[epa["match"] == True]))
print("Number of EPA Non-Matches: ", len(epa.loc[epa["match"] == False]))

print("Percent of EPA Matches: ", round(len(epa.loc[epa["match"] == True])/len(epa), 3))
print("Percent of EPA Non-Matches: ", round(len(epa.loc[epa["match"] == False])/len(epa), 3))

Number of EPA Matches:  414
Number of EPA Non-Matches:  100
Percent of EPA Matches:  0.805
Percent of EPA Non-Matches:  0.195


### EIA Fuel Breakdown

In [81]:
print("\nPercent Fuel Types Matched\n")
print(eia.loc[eia["match"] == True]["Energy Source 1"].value_counts(normalize=True).round(3))
print("\nPercent Fuel Types Not Matched\n")
print(eia.loc[eia["match"] == False]["Energy Source 1"].value_counts(normalize=True).round(3))


Percent Fuel Types Matched

Natural Gas    0.928
Coal           0.072
Name: Energy Source 1, dtype: float64

Percent Fuel Types Not Matched

Natural Gas    0.372
Wind           0.281
Solar          0.115
Renewable      0.072
Hydro          0.071
Petroleum      0.042
Other          0.041
Nuclear        0.006
Name: Energy Source 1, dtype: float64


### EPA Fuel Breakdown

In [84]:
print("\nPercent Fuel Types Matched\n")
print(epa.loc[epa["match"] == True]["fuel"].value_counts(normalize=True).round(3))
print("\nPercent Fuel Types Not Matched\n")
print(epa.loc[epa["match"] == False]["fuel"].value_counts(normalize=True).round(3))


Percent Fuel Types Matched

Pipeline Natural Gas    0.874
Coal                    0.101
Natural Gas             0.012
NONE                    0.007
Other Gas               0.005
Name: fuel, dtype: float64

Percent Fuel Types Not Matched

Pipeline Natural Gas    0.92
NONE                    0.06
Petroleum Coke          0.01
Wood                    0.01
Name: fuel, dtype: float64


In [8]:
class EIA():
    def __init__(self, key=None):
        if key is None:
            print("You need a key in order to use this API.")
        else:
            self.key = key

    def electric_plant_all(self, plant_id):
        series_id = 'ELEC.PLANT.GEN.{}-ALL-ALL.Q'.format(plant_id)
        req = requests.get("https://api.eia.gov/series/?series_id={}&api_key={}".format(series_id, self.key))
        return pd.DataFrame.from_dict(req.json().get('series')[0].get('data'))

In [45]:
class EPA():
    def __init__(self, key=None):
        if key is None:
            print("You need a key in order to use this API.")
        else:
            self.key = key

    def hourlyData(self, orisCode, unitID, year, quarter):
        req = requests.get("https://api.epa.gov/FACT/1.0/emissions/hourlyData/csv/{}/{}/{}/{}?api_key={}".format(
            orisCode, 
            unitID, 
            year, 
            quarter, 
            self.key))
        con = req.content
        return pd.read_csv(io.StringIO(con.decode('utf-8')))

### Aggregate EPA vs EIA MWh in Q1 2019

In [49]:
epa_api = EPA("9ndbfvcHxIEgQ8KnDGhmVFdw3xiyOgqhhwdJg5Wo")
eia_api = EIA("9ndbfvcHxIEgQ8KnDGhmVFdw3xiyOgqhhwdJg5Wo")

epa_data = epa_api.hourlyData(127.0, "1", "2019", 1)
eia_data = eia_api.electric_plant_all(9)

In [50]:
print(epa_data["HourLoad"].sum())
print(eia_data.loc[eia_data[0] == "2019Q1"].reset_index(drop=True).at[0,1])

KeyError: 'HourLoad'

In [12]:
epa_true = epa.loc[epa["match"] == True]
eia_true = eia.loc[eia["match"] == True]

In [51]:
walk = walk[["CAMD_PLANT_ID", "CAMD_UNIT_ID", "EIA_PLANT_ID", "EIA_GENERATOR_ID"]]
walk["EIA_PLANT_ID"] = walk["EIA_PLANT_ID"].astype(int)
# merged = eia_true.merge(walk, how="right", left_on=["Plant Code", "Generator ID"], right_on=["EIA_PLANT_ID", "EIA_GENERATOR_ID"])
# epa_true.merge(merged, how="right", left_on=["orisCode", "unitId"], right_on=["CAMD_PLANT_ID", "CAMD_UNIT_ID"]).dropna().drop_duplicates()


In [42]:
def get_eia_2019Q1(api,plant_id):
    temp = api.electric_plant_all(9)
    return temp.loc[temp[0] == "2019Q1"].reset_index(drop=True).at[0,1]

def get_EPA_2019Q1(api, plant_id, unit_id):
    return api.hourlyData(plant_id, unit_id, "2019", 1)


In [66]:
dic = {"plant_id":[], "eia":[], "epa":[]}
for i in walk["EIA_PLANT_ID"].unique():
    data = walk.loc[walk["CAMD_PLANT_ID"] == i]

    epa_sum = 0
    for j in data["CAMD_UNIT_ID"].unique():
        epa_sum += epa_api.hourlyData(i,j, "2019", 1)["HourLoad"].sum()
    print(epa_sum)
    temp = eia_api.electric_plant_all(i)
    eia_sum = temp.loc[temp[0] == "2019Q1"].reset_index(drop=True).at[0,1]
    dic["plant_id"].append(i)
    dic["eia"].append(eia_sum)
    dic["epa"].append(epa_sum)

yes = pd.DataFrame().from_dict(dic)


6290.0
962455.0
2054725.0
20660.0
265696.0
134993.0
5866.0
9295.0
632405.0
424828.0


EmptyDataError: No columns to parse from file

In [74]:
yes = pd.DataFrame().from_dict(dic)

In [75]:
yes["diff"] = yes["eia"] - yes["epa"]
yes["percent_diff"] = ((yes["eia"] - yes["epa"]) / yes["epa"]).abs() * 100


In [87]:
yes.to_csv("yes.csv")

In [88]:
yes["diff"].abs().mean()

28304.9324