In [7]:
import batch_data_support_regions as bds_reg
import os, os.path
import numpy as np
import pandas as pd
import model_attributes as ma
from attribute_table import AttributeTable
import datetime
import model_afolu as mafl
import model_ippu as mi
import model_circular_economy as mc
import model_energy as me
import model_electricity as ml
import model_socioeconomic as se
from model_socioeconomic import Socioeconomic
import setup_analysis as sa
import support_functions as sf
import importlib
import re
import time
import warnings
import matplotlib.pyplot as plt
from typing import *

importlib.reload(ma)
importlib.reload(sa)
importlib.reload(sf)
importlib.reload(mafl)
importlib.reload(mc)
importlib.reload(mi)
importlib.reload(me)
importlib.reload(se)
importlib.reload(ml)

warnings.filterwarnings("ignore")

In [50]:
dir_energy_potentials = "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/Energy"
dir_irena = os.path.join(dir_energy_potentials, "irena_generation_potential_data")

# get some attributes
attr_hour = sa.model_attributes.dict_attributes.get("hour")
attr_region = sa.model_attributes.dict_attributes.get(f"{sa.model_attributes.dim_region}")
attr_sector = sa.model_attributes.dict_attributes.get("abbreviation_sector")
attr_strat = sa.model_attributes.dict_attributes.get(f"dim_{sa.model_attributes.dim_strategy_id}")
attr_time_period = sa.model_attributes.dict_attributes.get(f"dim_{sa.model_attributes.dim_time_period}")
attr_time_slice = sa.model_attributes.dict_attributes.get(f"time_slice")
attr_tg1 = sa.model_attributes.dict_attributes.get("ts_group_1")
attr_tg2 = sa.model_attributes.dict_attributes.get("ts_group_2")

# set some fields
field_country = "country"
field_iso = "iso_code3"
field_iso_region_attr = "iso_alpha_3"
field_hour = attr_hour.key
field_hour_group = "hour_group"
field_time_period = attr_time_period.key
field_year = "year"

# attribute derivatives
dict_country_to_iso = dict((k, v.upper()) for k, v in attr_region.field_maps.get(f"{attr_region.key}_to_{field_iso_region_attr}").items())
dict_iso_to_country = sf.reverse_dict(dict_country_to_iso)


# call variables from the electric model
model_elec = ml.ElectricEnergy(sa.model_attributes, sa.dir_jl, sa.dir_ref_nemo, initialize_julia = False)


##  SET SOME DiCTIONARIES

# used in a number of places
dict_n_days_per_month = {
    1: 31,
    2: 28,
    3: 31,
    4: 30,
    5: 31,
    6: 30,
    7: 31,
    8: 31,
    9: 30,
    10: 31,
    11: 30,
    12: 31
}
# weights days/month on average when only monthly data are avaiable
dict_num_days_per_month_weights = dict((k, (v if (k != 2) else 28.25)) for k, v in dict_n_days_per_month.items())
# format month/time group 1 dictionaries
dict_tg1_to_months = dict(
    (k, [int(x) for x in v.split("|")]) for k, v in attr_tg1.field_maps.get(f"{attr_tg1.key}_to_months").items())
# map each month to the TG1
dict_month_to_tg1 = {}
for k in dict_tg1_to_months.keys():
    mos = dict_tg1_to_months.get(k)
    for m in mos:
        dict_month_to_tg1.update({m: k})

# hour group dictionaries -- et up the regular expression to match hour groups on
dict_hour_to_hour_group = attr_hour.field_maps.get(f"{attr_hour.key}_to_hour_group")
def regex_by_hour_group(
    hour_group: int
) -> Union[str, None]:
    return re.compile(f"(\D*)_w(\D*){hour_group}$")
# iterate to build dictionary
all_hour_groups = list(set(dict_hour_to_hour_group.values()))
dict_time_slice_to_hour_group = {}
for hg in all_hour_groups:
    regex = regex_by_hour_group(hg)
    for time_slice in attr_time_slice.key_values:
        if regex.match(time_slice) is not None:
            dict_time_slice_to_hour_group.update({time_slice: hg})

##  Use IEA monthly electricity generation as a proxy for specified demand profile
- See https://www.iea.org/data-and-statistics/data-product/monthly-electricity-statistics#monthly-electricity-statistics


In [94]:
# read data frames
df_tg2_distribution = pd.read_excel(
    "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/Energy/estimate_of_nemomod_specified_demand_profile.xlsx",
    sheet_name = "raw_data_weighted_by_tg2"
)

df_consumption_monthly = pd.read_csv(
    "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/Energy/MES_1122_utf8.csv",
    skiprows = 8,
    
)

# some replacements
dict_repl_consumption = {
    "czech_republic": "czechia",
    "korea": "republic_of_korea",
    "people's_republic_of_china": "china",
    "republic_of_turkiye": "turkey",
    "slovak_republic": "slovakia",
    "united_states": "united_states_of_america"
}

# setup some shared variables
balance_consumption = "Final Consumption (Calculated)"
balance_production = "Net Electricity Production"
balance_storage = "Used for pumped storage"

field_balance = "Balance"
field_cfr = "cfr"
field_country = "Country"
field_distribution_load = "dist_load"
field_distribution_load_hourly = "dist_load_hourly"
field_distribution_load_monthly = "dist_load_monthly"
field_iso_region_attr = "iso_alpha_3"
field_load = "est_load_mwh"
field_month = "month"
field_product = "Product"
field_tg1 = model_elec.field_nemomod_tg1
field_tg2 = model_elec.field_nemomod_tg2
field_value = "Value"
field_year = "year"

# clean
new_countries = [
    x.lower().replace(" ", "_") 
    for x in list(df_consumption_monthly[field_country])
]
new_countries = [dict_repl_consumption.get(x, x) for x in new_countries]
df_consumption_monthly[field_country] = new_countries
df_consumption_monthly = df_consumption_monthly[
    df_consumption_monthly[field_balance].isin([balance_production]) &
    df_consumption_monthly[field_product].isin(["Electricity"])
].reset_index(drop = True)


# some dicts
dict_wb_region_to_region = sf.group_df_as_dict(
    attr_region.table,
    [field_wb_global_region],
    fields_out_set = attr_region.key
)
dict_region_to_wb_region = attr_region.field_maps.get(f"{attr_region.key}_to_{field_wb_global_region}")


####################
#   FORMAT DATA    #
####################

def get_ym(
    datestr: str
) -> Tuple[int, int]:
    dt = datetime.datetime.strptime(datestr, "%b-%y")
    return (dt.year, dt.month)

df_ym = pd.DataFrame(
    list(df_consumption_monthly["Time"].apply(get_ym)), 
    columns = [field_year, field_month]
).astype(int)
df_consumption_monthly = (
    pd.concat(
        [df_consumption_monthly, df_ym], 
        axis = 1
    )
    .drop(
        ["Time", "Product", "Unit"], 
        axis = 1
    )
)

# widen by balance and drop storage
"""
print(f"--- NOTE: Subtracting balanace '{balance_storage}' from total consumption balance '{balance_consumption}'. If results are weird, check here. ---")
df_consumption_monthly = sf.pivot_df_clean(
    df_consumption_monthly,
    [field_balance],
    [field_value]
).fillna(0)
df_consumption_monthly[field_distribution_load_monthly] = sf.vec_bounds(
    np.array(df_consumption_monthly[balance_consumption]) - 
    np.array(df_consumption_monthly[balance_storage]), 
    (0, np.inf)
)

df_consumption_monthly.drop([balance_consumption, balance_storage], axis = 1, inplace = True)
""";
df_consumption_monthly = (
    df_consumption_monthly
    .drop([field_balance], axis = 1)
    .rename(
        columns = {
            field_value: field_distribution_load_monthly
        }
    )
)

# convert to monthly annual proportions 
df_consumption_monthly_grouped = df_consumption_monthly.groupby([field_country, field_year])
year_min = 2015
df_out = []
for i, df in df_consumption_monthly_grouped:
    vec_month = np.array(df[field_distribution_load_monthly]) 
    df[field_distribution_load_monthly] = vec_month/vec_month.sum()
    df_out.append(df) if ((len(vec_month) == 12) & (i[1] >= year_min)) else None
df_out = pd.concat(df_out, axis = 0).reset_index(drop = True) 

# get average omnthly production
df_production_monthly_average = sf.simple_df_agg(
    df_out.drop([field_year], axis = 1),
    [field_country, field_month],
    "mean"
).rename(columns = {field_country: field_iso})
df_production_monthly_average[field_iso].replace(dict_country_to_iso, inplace = True)


In [197]:
# read data frames
df_tg2_distribution = pd.read_excel(
    "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/Energy/estimate_of_nemomod_specified_demand_profile.xlsx",
    sheet_name = "raw_data_weighted_by_tg2"
)

df_consumption_monthly = pd.read_csv(
    "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/Energy/MES_1122_utf8.csv",
    skiprows = 8,
    
)

# some replacements
dict_repl_consumption = {
    "czech_republic": "czechia",
    "korea": "republic_of_korea",
    "people's_republic_of_china": "china",
    "republic_of_turkiye": "turkey",
    "slovak_republic": "slovakia",
    "united_states": "united_states_of_america"
}

# setup some shared variables
balance_consumption = "Final Consumption (Calculated)"
balance_production = "Net Electricity Production"
balance_storage = "Used for pumped storage"

field_balance = "Balance"
field_cfr = "cfr"
field_country = "Country"
field_distribution_load = "dist_load"
field_distribution_load_hourly = "dist_load_hourly"
field_distribution_load_monthly = "dist_load_monthly"
field_load = "est_load_mwh"
field_month = "month"
field_product = "Product"
field_tg1 = model_elec.field_nemomod_tg1
field_tg2 = model_elec.field_nemomod_tg2
field_value = "Value"
field_year = "year"

# clean
new_countries = [
    x.lower().replace(" ", "_") 
    for x in list(df_consumption_monthly[field_country])
]
new_countries = [dict_repl_consumption.get(x, x) for x in new_countries]
df_consumption_monthly[field_country] = new_countries
df_consumption_monthly = df_consumption_monthly[
    df_consumption_monthly[field_balance].isin([balance_production]) &
    df_consumption_monthly[field_product].isin(["Electricity"])
].reset_index(drop = True)


# some dicts
dict_wb_region_to_regions = sf.group_df_as_dict(
    attr_region.table,
    [field_wb_global_region],
    fields_out_set = attr_region.key
)
dict_region_to_wb_region = attr_region.field_maps.get(f"{attr_region.key}_to_{field_wb_global_region}")


####################
#   FORMAT DATA    #
####################

def get_ym(
    datestr: str
) -> Tuple[int, int]:
    dt = datetime.datetime.strptime(datestr, "%b-%y")
    return (dt.year, dt.month)

df_ym = pd.DataFrame(
    list(df_consumption_monthly["Time"].apply(get_ym)), 
    columns = [field_year, field_month]
).astype(int)
df_consumption_monthly = (
    pd.concat(
        [df_consumption_monthly, df_ym], 
        axis = 1
    )
    .drop(
        ["Time", "Product", "Unit"], 
        axis = 1
    )
)

# widen by balance and drop storage
"""
print(f"--- NOTE: Subtracting balanace '{balance_storage}' from total consumption balance '{balance_consumption}'. If results are weird, check here. ---")
df_consumption_monthly = sf.pivot_df_clean(
    df_consumption_monthly,
    [field_balance],
    [field_value]
).fillna(0)
df_consumption_monthly[field_distribution_load_monthly] = sf.vec_bounds(
    np.array(df_consumption_monthly[balance_consumption]) - 
    np.array(df_consumption_monthly[balance_storage]), 
    (0, np.inf)
)

df_consumption_monthly.drop([balance_consumption, balance_storage], axis = 1, inplace = True)
""";
df_consumption_monthly = (
    df_consumption_monthly
    .drop([field_balance], axis = 1)
    .rename(
        columns = {
            field_value: field_distribution_load_monthly
        }
    )
)

# convert to monthly annual proportions 
df_consumption_monthly_grouped = df_consumption_monthly.groupby([field_country, field_year])
year_min = 2015
df_out = []
for i, df in df_consumption_monthly_grouped:
    vec_month = np.array(df[field_distribution_load_monthly]) 
    df[field_distribution_load_monthly] = vec_month/vec_month.sum()
    df_out.append(df) if ((len(vec_month) == 12) & (i[1] >= year_min)) else None
df_out = pd.concat(df_out, axis = 0).reset_index(drop = True) 

# get average monthly production
df_production_monthly_average = sf.simple_df_agg(
    df_out.drop([field_year], axis = 1),
    [field_country, field_month],
    {field_distribution_load_monthly: "mean"}
).rename(columns = {field_country: field_iso})
df_production_monthly_average[field_iso].replace(dict_country_to_iso, inplace = True)

# for regions that have no IEA/OECD data, try to relate to closest region
dict_try_wb_region_to_related_wb_region = {
    "Middle East & North Africa": "Latin America & Caribbean",
    "Sub-Saharan Africa": "Latin America & Caribbean",
    "South Asia": "East Asia & Pacific"
}

regions_with_average = set(df_production_monthly_average[field_iso])
regions_missing = sorted(list(set(dict_iso_to_country.keys()) - regions_with_average))
# set a default code (may not be true ISO) for countries for which data are not available
iso_dummy_default = "iea_total"



"""
TRY REPLACING WITH THIS

regions = sc.Regions(sa.model_attributes)
regions.aggregate_df_by_wb_global_region(
    df_in,
    [field_month],
    {field_distribution_load_monthly: "mean"},
    field_iso = field_iso
)
"""
# function to get a wb regional average
def get_production_avg_for_region(
    df_in: pd.DataFrame,
    global_wb_region: str,
    dict_region_to_iso: dict = dict_country_to_iso,
    field_distribution_load_monthly: str = field_distribution_load_monthly,
    field_iso: str = field_iso,
    field_month: str = field_month
) -> pd.DataFrame:
    """
    Get a regional average (for WB global region) across ISOs for which
        production averages are available in df_in
    """
    
    regions_wb = [
        dict_region_to_iso.get(x) for x in dict_wb_region_to_regions.get(global_wb_region)
    ]
    
    df_filt = df_in[
        df_in[field_iso].isin(regions_wb)
    ]
    
    # get mean
    df_filt = sf.simple_df_agg(
        df_filt, 
        [field_month],
        {field_distribution_load_monthly: "mean"}
    )
    
    return df_filt



if len(regions_missing) > 0:

    df_append = [df_production_monthly_average]

    for iso_missing in regions_missing:

        # initialize some potential components
        df_cur = None
        iso_replace = None

        region = dict_iso_to_country.get(iso_missing)
        region_wb = dict_region_to_wb_region.get(region)
        regions_wb = set(dict_wb_region_to_regions.get(region_wb))
        regions_valid = list(regions_wb & regions_with_average)

        if len(regions_valid) > 0:
            # get closest region within global WB region
            iso_replace = bds_reg.get_closest_region(
                iso_missing,
                attr_region, 
                regions_valid = regions_valid,
                type_input = "iso",
                type_return = "iso"
            )

        elif region_wb in dict_try_wb_region_to_related_wb_region.keys():

            region_wb = dict_try_wb_region_to_related_wb_region.get(region_wb)
            df_cur = get_production_avg_for_region(
                df_production_monthly_average,
                region_wb
            )

        else:
            # default to global IEA average
            iso_replace = iso_dummy_default


        df_cur = (
            df_production_monthly_average[
                df_production_monthly_average[field_iso] == iso_replace
            ].copy().reset_index(drop = True)
            if (df_cur is None) and (iso_replace is not None)
            else df_cur
        ) 

        df_cur[field_iso] = iso_missing
        df_append.append(df_cur)

    df_production_monthly_average = pd.concat(df_append).reset_index(drop = True)



# aggregate by tg1
df_production_tg1_average = df_production_monthly_average.copy()
df_production_tg1_average[field_month].replace(dict_month_to_tg1, inplace = True)
df_production_tg1_average.rename(columns = {field_month: field_tg1}, inplace = True)

df_production_tg1_average = sf.simple_df_agg(
    df_production_tg1_average,
    [field_iso, field_tg1], 
    {field_distribution_load_monthly: "sum"}
)



##  AGGREGATE HOURLY VARIATION BY HOUR GROUP

df_hg_distribution = df_tg2_distribution.copy()
df_hg_distribution[field_hour].replace(dict_hour_to_hour_group, inplace = True)
df_hg_distribution.rename(columns = {field_hour: field_hour_group}, inplace = True)

df_hg_distribution = sf.simple_df_agg(
    df_hg_distribution,
    [field_hour_group, field_tg2],
    {field_distribution_load: "sum"}
).rename(columns = {field_distribution_load: field_distribution_load_hourly})
    
    
    

##  BUILD TABLE

# initialize the output in terms of hour group
df_sdp = attr_time_slice.table.copy().drop(["description"], axis = 1)
df_sdp[field_hour_group] = df_sdp[attr_time_slice.key].replace(dict_time_slice_to_hour_group)
# add hourly fraction
df_sdp = pd.merge(
    df_sdp,
    df_hg_distribution, 
    how = "left"
)
df_sdp = pd.merge(
    df_sdp, 
    df_production_tg1_average, 
    how = "outer"
)


# add some key fields and format for nemomod
df_sdp[model_elec.field_nemomod_value] = np.array(df_sdp[field_distribution_load_hourly])*np.array(df_sdp[field_distribution_load_monthly])
df_sdp[model_elec.field_nemomod_fuel] = model_elec.cat_enfu_elec
df_sdp[field_iso].replace(dict_iso_to_country, inplace = True) 
df_sdp.rename(
    columns = {
        field_iso: model_elec.field_nemomod_region,
        attr_time_slice.key: model_elec.field_nemomod_time_slice
    },
    inplace = True
)

# clean sums (very close to 1, just make 1)
df_sdp_grouped = df_sdp.groupby(
    [
        model_elec.field_nemomod_region, 
        model_elec.field_nemomod_fuel
    ]
)

df_sdp_out = []
for i, df in df_sdp_grouped:
    vec_val = np.array(df[model_elec.field_nemomod_value])
    df[model_elec.field_nemomod_value] = vec_val/vec_val.sum()
    df_sdp_out.append(df)
df_sdp = pd.concat(df_sdp_out, axis = 0)


df_sdp = df_sdp[
    [
        model_elec.field_nemomod_region,
        model_elec.field_nemomod_fuel,
        model_elec.field_nemomod_time_slice,
        model_elec.field_nemomod_value
    ]
].sort_values(by = [
    model_elec.field_nemomod_region,
    model_elec.field_nemomod_fuel,
    model_elec.field_nemomod_time_slice,
]).reset_index(drop = True)


if True:
    ## write to output
    df_sdp.to_csv(
        sa.dict_fp_csv_nemomod.get("SpecifiedDemandProfile"),
        index = None,
        encoding = "UTF-8"
    )
