# Contents
* 2019 grid mix in CISO, NEVP, WACM, BPAT, US
* Consumption emissions factors in 2019 for the same balancing areas using custom technology-specific emissions factors
* Some drafting work

# TODOs left for a future date
* Deal with CFE (Mexico) issue in a cleaner way - ideally in the consumption emissions function. Should unit tests to help for this issue
* Make extraction of EFs from calc easier - Have the corresponding dataframe exposed as an attribute from BaDataEmissionsCalc?
* Make the code faster? I am getting a performance warning from pandas.

In [None]:
import sys

print(sys.executable)

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd

import gridemissions
from gridemissions.load import BaData
from gridemissions.viz.reports import cleaning_plot
from gridemissions import eia_api
from gridemissions.emissions import BaDataEmissionsCalc

In [None]:
gridemissions.config["DATA_PATH"]

# 2019 grid mix in CISO, NEVP, WACM, BPAT, US

In [None]:
file_name_elec = gridemissions.config["DATA_PATH"] / "EBA_elec.csv"

elec = BaData(fileNm=file_name_elec)

start = pd.to_datetime("20190101T00Z")
end = pd.to_datetime("20200101T00Z")
bas = ["CISO", "NEVP", "WACM", "BPAT"]


def get_grid_mix(ba):
    NG_cols = sum((elec.get_cols(r=ba, field=f"SRC_{src}") for src in eia_api.SRC), [])
    NG_cols = [c for c in NG_cols if c in elec.df.columns]
    grid_mix_2019 = elec.df.loc[start:end, NG_cols].sum()
    grid_mix_2019.index = grid_mix_2019.index.map(lambda x: x.split(".")[3])
    # grid_mix_2019 = grid_mix_2019 / grid_mix_2019.sum()
    return grid_mix_2019


grid_mixes_all = pd.DataFrame({ba: get_grid_mix(ba) for ba in elec.regions}).fillna(0.0)
grid_mix_us = grid_mixes_all.sum(axis=1)
grid_mix_us /= grid_mix_us.sum()

grid_mixes = grid_mixes_all[bas].copy()
grid_mixes /= grid_mixes.sum()
grid_mixes.loc[:, "US"] = grid_mix_us
grid_mixes.loc[
    ["WAT", "NUC", "SUN", "NG", "WND", "COL", "OIL", "BIO", "GEO", "OTH"]
].to_csv("2019_grid_mixes.csv")

# Custom Emissions Factors

# Load data

In [None]:
file_name_elec = gridemissions.config["DATA_PATH"] / "EBA_elec.csv"
file_name_co2 = gridemissions.config["DATA_PATH"] / "EBA_co2.csv"

elec = BaData(fileNm=file_name_elec)
co2 = BaData(fileNm=file_name_co2, variable="CO2")

In [None]:
start = pd.to_datetime("20190101T00Z")
end = pd.to_datetime("20200101T00Z")

# Hack: Remove CFE from dataset before this date
CFE_cutoff_date = pd.to_datetime("20210128T20Z")
non_CFE_cols = [c for c in elec.df.columns if not (("CFE-" in c) or ("-CFE" in c))]

# Remove all columns with NaNs?
cols = [
    c
    for c in non_CFE_cols
    if c not in ["EBA.GRID-ALL.NG.OTH.H", "EBA.IPCO-ALL.NG.COL.H"]
]

if (start < CFE_cutoff_date) and (end < CFE_cutoff_date):
    mini_elec = BaData(df=elec.df.loc[start:end, cols])
elif (start < CFE_cutoff_date) and (end > CFE_cutoff_date):
    raise ValueError("Not supported")
else:
    mini_elec = BaData(df=elec.df.loc[start:end])

In [None]:
EFs = pd.read_csv("technology EFs.csv", index_col=0).fillna(0.0)
EFs.loc["UNK"] = EFs.loc["OTH"]

In [None]:
def get_consumption_efs(poll):
    co2_calc = BaDataEmissionsCalc(mini_elec, poll=poll, EF=EFs[poll])
    co2_calc.process()

    poll_data = co2_calc.poll_data
    polli_data = pd.DataFrame(
        {
            ba: (
                poll_data.df.loc[
                    start:end, poll_data.get_cols(ba, field="D")
                ].values.flatten()
                / elec.df.loc[start:end, elec.get_cols(ba, field="D")].values.flatten()
            )
            for ba in poll_data.regions
        },
        index=poll_data.df.loc[start:end].index,
    )

    return polli_data[bas].mean()


pd.DataFrame({poll: get_consumption_efs(poll) for poll in EFs.columns}).to_csv(
    "2019 consumption EFs.csv"
)

In [None]:
raise

# Sanity check: rerun emissions calc from elec data
We should get the same result...

## Same result
start = pd.to_datetime("20220101")  
end = pd.to_datetime("20220102")

## Different result
start = pd.to_datetime("20200101")  
end = pd.to_datetime("20200102")  
There is a warning about some NaNs being set to 0 for this one - could this be the issue? Need to check this as this is making it impossible to get data before 2019. Was this the issue with imports from Mexico? Check if there are columns missing in the full dataset. If the number of BAs changed at different points in time, that could be causing the code to break. I may have seen this before...

In [None]:
elec.df.head()

# Issue with columns that were introduced later

In [None]:
f, ax = plt.subplots()
for i, col in enumerate(elec.df.loc[:, elec.df.isna().sum() > 0].columns):
    ax.plot((1 - elec.df.loc[:, col].isna()) * (i + 1), label=col)
ax.legend(loc=6, bbox_to_anchor=(1.0, 0.5))
f.autofmt_xdate()

In [None]:
elec.df.loc[:, elec.df.isna().sum() > 0].columns

In [None]:
start = pd.to_datetime("20190101T00Z")
end = pd.to_datetime("20200101T00Z")

# Hack: Remove CFE from dataset before this date
CFE_cutoff_date = pd.to_datetime("20210128T20Z")
non_CFE_cols = [c for c in elec.df.columns if not (("CFE-" in c) or ("-CFE" in c))]

# Remove all columns with NaNs?
cols = [
    c
    for c in non_CFE_cols
    if c not in ["EBA.GRID-ALL.NG.OTH.H", "EBA.IPCO-ALL.NG.COL.H"]
]

if (start < CFE_cutoff_date) and (end < CFE_cutoff_date):
    mini_elec = BaData(df=elec.df.loc[start:end, cols])
elif (start < CFE_cutoff_date) and (end > CFE_cutoff_date):
    raise ValueError("Not supported")
else:
    mini_elec = BaData(df=elec.df.loc[start:end])

In [None]:
mini_elec.df

In [None]:
co2_calc = BaDataEmissionsCalc(mini_elec)
co2_calc.process()
co2_calc.poll_data.df

In [None]:
co2.df.loc[start:end, co2_calc.poll_data.df.columns]

In [None]:
# Super quick check of the difference
(
    co2.df.loc[start:end, co2_calc.poll_data.df.columns] - co2_calc.poll_data.df
).abs().sum().sum()

# Change EFs and recompute

In [None]:
?BaDataEmissionsCalc

In [None]:
co2_calc.poll_data.df.to_csv("H2O_2019.csv")

In [None]:
co2.get_cols("CISO", field="D")

In [None]:
CO2i = pd.DataFrame(
    {
        ba: (
            co2.df.loc[start:end, co2.get_cols(ba, field="D")].values.flatten()
            / elec.df.loc[start:end, elec.get_cols(ba, field="D")].values.flatten()
        )
        for ba in H2O.regions
    },
    index=co2.df.loc[start:end].index,
)

In [None]:
H2Oi.to_csv("H2Oi_2019.csv")

In [None]:
f, ax = plt.subplots()
H2Oi[["CISO", "NEVP", "WACM"]].plot(ax=ax)
ax.set_ylabel("")

In [None]:
H2Oi.groupby(H2Oi.index.year).mean()[["CISO", "NEVP", "WACM"]]