In [1]:
import xarray as xr
import cf_xarray as cfxr
import numpy as np

import sys

sys.path.append("..")

from cge_modeling.gams.gams_constants import ENERGY, energy_sam
from cge_modeling.gams.from_excel import make_code_dicts

In [2]:
group_order = [
    "Imported Commodities",
    "Domestic Commodities",
    "Activities",
    "Factors",
    "Institution",
    "Import Sales Tax",
    "Domestic Sales Tax",
    "Factor Tax",
    "Factor Subsidity",
    "Tax",
    "Import Margin",
    "Export Margin",
    "Trade",
    "Other",
]

reduced_group_order = [
    "Imported Commodities",
    "Domestic Commodities",
    "Activities",
    "Factors",
    "Institution",
    "Import Sales Tax",
    "Domestic Sales Tax",
    "Factor Tax",
    "Factor Subsidity",
    "Tax",
    "Trade",
]

power_suppliers = [
    "CoalP",
    "GasP",
    "HydroP",
    "NuclearP",
    "OilP",
    "OtherP",
    "SolarP",
    "WindP",
    "TnD",
]
goods_suppliers = [
    "AFS",
    "ATP",
    "BPH",
    "B_T",
    "CHM",
    "CMN",
    "CMT",
    "CNS",
    "COA",
    "CTL",
    "C_B",
    "DWE",
    "EDU",
    "EEQ",
    "ELE",
    "FMP",
    "FRS",
    "FSH",
    "GAS",
    "GDT",
    "GRO",
    "HHT",
    "INS",
    "I_S",
    "LEA",
    "LUM",
    "MIL",
    "MVH",
    "NFM",
    "NMM",
    "OAP",
    "OBS",
    "OCR",
    "OFD",
    "OFI",
    "OIL",
    "OME",
    "OMF",
    "OMT",
    "OSD",
    "OSG",
    "OTN",
    "OTP",
    "OXT",
    "PCR",
    "PDR",
    "PFB",
    "PPP",
    "P_C",
    "RMK",
    "ROS",
    "RPP",
    "RSA",
    "SGR",
    "TEX",
    "TRD",
    "VOL",
    "V_F",
    "WAP",
    "WHS",
    "WHT",
    "WOL",
    "WTP",
    "WTR",
]
goods_suppliers_3x3 = ["Agriculture", "Manufacturing", "Service"]

sorted_cols_reduced = (
    [("Factors",) + (factor,) for factor in ["Capital", "Labor", "Electricity"]]
    + [
        ("Institution",) + (institution,)
        for institution in ["Household", "Government", "Investment", "Grid"]
    ]
    + [("Imported Commodities",) + (firm,) for firm in goods_suppliers_3x3 + power_suppliers]
    + [("Domestic Commodities",) + (firm,) for firm in goods_suppliers_3x3 + power_suppliers]
    + [("Activities",) + (firm,) for firm in goods_suppliers_3x3 + power_suppliers]
    + [("Import Sales Tax",) + (firm,) for firm in goods_suppliers_3x3 + power_suppliers]
    + [("Domestic Sales Tax",) + (firm,) for firm in goods_suppliers_3x3 + power_suppliers]
    + [("Factor Tax",) + (factor,) for factor in ["Capital", "Labor", "Electricity"]]
    + [("Factor Subsidity",) + (factor,) for factor in ["Capital", "Labor", "Electricity"]]
    + [("Tax",) + (tax,) for tax in ["Export Duty", "Import Duty", "Income", "Production"]]
    + [("Trade", "Rest of World")]
)

In [3]:
df = xr.load_dataset("GTAP_SAM_with_errors.nc")
df = cfxr.decode_compress_to_multi_index(df, ["row", "column"]).SAM

In [4]:
path = "data/GTAP_raw_data.xlsx"
code_dicts = make_code_dicts(path)

# Write to Excel

In [5]:
import pandas as pd
import os


def create_excel(df, code_dicts, output_path="data/gtap_SAM_2017.xlsx", force_rewrite=False):

    code_to_country = code_dicts["country"]
    country_to_code = {v: k for k, v in code_to_country.items()}

    country_names = sorted(list(code_to_country.values()))
    country_codes = [country_to_code.get(country) for country in country_names]

    if os.path.isfile(output_path) and not force_rewrite:
        return

    with pd.ExcelWriter("data/gtap_SAM_2017.xlsx", engine="openpyxl") as writer:
        for country in country_codes:
            temp = df.sel(country=country).to_dataframe()["SAM"].unstack(["col_group", "col_value"])
            temp.columns.names = ["group", "value"]
            temp.index.names = ["group", "value"]
            temp = temp.loc[group_order, group_order]

            long_name = code_to_country[country]
            temp.to_excel(writer, sheet_name=long_name[:30])


# create_excel(df, code_dicts)

# Single Country 

In [6]:
SRB = df.sel(country="SRB").to_dataframe()["SAM"].unstack(["col_group", "col_value"])
SRB.columns.names = ["group", "value"]
SRB.index.names = ["group", "value"]

In [7]:
def sam_errors(df):
    return np.abs(df.sum(axis=0) - df.sum(axis=1))


def max_error(df, func="max"):
    if func == "max":
        return sam_errors(df).max()
    elif func == "idxmax":
        return sam_errors(df).idxmax()

In [8]:
def merge_values(df, level_0, to_merge, agg_name):
    df = df.copy()
    indexes = [(level_0,) + (x,) for x in to_merge]
    agg_name_temp = f"__{agg_name}__"

    row = df.loc[indexes, :].sum(axis=0)
    df.loc[(level_0, agg_name_temp), :] = row
    df.drop(index=indexes, inplace=True)
    df.sort_index(axis=0, level=[0, 1], inplace=True)

    col = df.loc[:, indexes].sum(axis=1)
    df.loc[:, (level_0, agg_name_temp)] = col
    df.drop(columns=indexes, inplace=True)
    df.sort_index(axis=1, level=[0, 1], inplace=True)

    df.rename(index={agg_name_temp: agg_name}, inplace=True)
    df.rename(columns={agg_name_temp: agg_name}, inplace=True)

    return df


def merge_level_0(df, merge_into, to_merge):
    df = df.copy()
    temp_row = None
    temp_col = None
    for level in to_merge:
        row = df.loc[level, :].sum(axis=0)
        temp_row = temp_row + row if temp_row is not None else row
        df.drop(index=level, inplace=True)

        col = df.loc[:, level].sum(axis=1)
        temp_col = temp_col + col if temp_col is not None else col
        df.drop(columns=level, inplace=True)

    df.loc[merge_into, :] += temp_row
    df.loc[:, merge_into] += temp_col
    return df

In [17]:
def aggregate_SAM(df, to_3x3=False, add_grid=True):
    df = df.copy()
    factors = df.loc["Factors"].index
    capital, land, res, *labor = factors
    for group in ["Factors", "Factor Tax", "Factor Subsidity"]:
        # Combine labor factors into a single factor
        df = merge_values(df, group, labor, "Labor")

        # Combine land and natural resources into capital
        df = merge_values(df, group, [capital, land, res], "Capital")

    #     Merge similar power types
    merges = [
        ("GasP", ("Gas power baseload", "Gas power peakload")),
        ("HydroP", ("Hydro power baseload", "Hydro power peakload")),
        ("OilP", ("Oil power baseload", "Oil power peakload")),
    ]
    for target, sources in merges:
        code_to_source = code_dicts["energy"]
        source_to_code = {v: k for k, v in code_to_source.items()}
        sources = list(map(source_to_code.get, sources))
        for group in [
            "Domestic Commodities",
            "Imported Commodities",
            "Activities",
            "Domestic Sales Tax",
            "Import Sales Tax",
        ]:
            df = merge_values(df, group, sources, target)

    # Rename other power suppliers
    power_rename = {
        "NuclearBL": "NuclearP",
        "CoalBL": "CoalP",
        "OtherBL": "OtherP",
        "WindBL": "WindP",
    }
    df.rename(index=power_rename, inplace=True)
    df.rename(columns=power_rename, inplace=True)

    # Fold margins into the broader import/export accounts
    df = merge_level_0(df, ("Trade", "Rest of World"), ["Import Margin", "Export Margin"])

    # Eliminate the regional household.
    # The market clearing for the regional household is:
    # factor_income + tax_rev = Y_H + Y_G + S
    hh_income = df.loc[("Institution", "Regional Household"), ["Factors"]]
    C_H = df.loc[("Institution", "Private Household"), ("Institution", "Regional Household")].item()

    tax_idx = ["Domestic Sales Tax", "Import Sales Tax", "Tax", "Factor Tax", "Factor Subsidity"]
    tax_income = df.loc[("Institution", "Regional Household"), tax_idx]
    C_G = df.loc[("Institution", "Government"), ("Institution", "Regional Household")].item()

    S = df.loc[("Institution", "Investment"), ("Institution", "Regional Household")].item()
    K_d = df.loc[("Institution", "Regional Household"), ("Other", "Capital Depreciation")].item()

    # Drop the regional household
    df = df.drop(columns=[("Institution", "Regional Household")]).drop(
        index=[("Institution", "Regional Household")]
    )

    # Drop the capital depreciaton as well
    df = df.drop(columns=[("Other", "Capital Depreciation")]).drop(
        index=[("Other", "Capital Depreciation")]
    )

    # We want to split this into two equations:
    # factor_income = Y_H + S_H
    # tax_rev = Y_G + S_G
    # With S - K_d = S_H + S_G

    #     # Household income comes from the factors
    df.loc[("Institution", "Private Household"), ["Factors"]] = hh_income

    #     # Choose S_H so that the household must have a balanced budget, S_H = factor_income - Y_H
    S_HH = hh_income.sum() - C_H
    df.loc[("Institution", "Investment"), ("Institution", "Private Household")] = S_HH

    #     # Tax revenue belongs to the government
    df.loc[("Institution", "Government"), tax_idx] = tax_income

    #     # For goverment savings, set it to whatever respects S + K_d = S_H + S_G
    S_G = S - K_d - S_HH
    df.loc[("Institution", "Investment"), ("Institution", "Government")] = S_G

    df.rename(index={"Private Household": "Household"}, inplace=True)
    df.rename(columns={"Private Household": "Household"}, inplace=True)
    # Next I want to "chop off" the value chain matrix to construct the grid.
    # I need to sum up all electricity-related purchases by all agents, and add them to the "Grid"
    # TnD is Electricity transmission and distribution, plus all the power plants themselves
    power_suppliers = [
        "CoalP",
        "GasP",
        "HydroP",
        "NuclearP",
        "OilP",
        "OtherP",
        "SolarP",
        "WindP",
        "TnD",
    ]

    if add_grid:
        # Create a new institution, the "grid"
        df.loc[("Institution", "Grid"), :] = 0.0
        df.loc[:, ("Institution", "Grid")] = 0.0

        # Create a new factor, "electricity"
        df.loc[("Factors", "Electricity"), :] = 0.0
        df.loc[:, ("Factors", "Electricity")] = 0.0

        # Create new factor tax and subsidy for electricity
        df.loc[("Factor Tax", "Electricity"), :] = 0.0
        df.loc[:, ("Factor Tax", "Electricity")] = 0.0

        df.fillna(0, inplace=True)

        old_df = df.copy()
        for group in ["Domestic", "Imported"]:
            power_slice = [(f"{group} Commodities",) + (x,) for x in power_suppliers]
            tax_slice = [(f'{group.replace("ed", "")} Sales Tax',) + (x,) for x in power_suppliers]

            # Grid buys all output from power plants
            firm_power_demand = df.loc[power_slice, "Activities"]
            hh_power_demand = df.loc[power_slice, ("Institution", "Household")]
            df.loc[power_slice, ("Institution", "Grid")] = (
                firm_power_demand.sum(axis=1) + hh_power_demand
            )

            # As a result, they pay all the tax
            firm_power_tax = df.loc[tax_slice, "Activities"]
            hh_power_tax = df.loc[tax_slice, ("Institution", "Household")]
            df.loc[tax_slice, ("Institution", "Grid")] = firm_power_tax.sum(axis=1) + hh_power_tax

            # Spending previously done on power output is now on electricity
            df.loc[power_slice, "Activities"] = 0
            df.loc[power_slice, ("Institution", "Household")] = 0
            df.loc[tax_slice, "Activities"] = 0
            df.loc[tax_slice, ("Institution", "Household")] = 0

            total_demand = firm_power_demand.sum(axis=0) + firm_power_tax.sum(axis=0)
            total_demand.name = ("Factors", "Electricity")
            old_demand = df.loc[("Factors", "Electricity"), "Activities"]
            new_demand = old_demand + total_demand

            df.loc[("Factors", "Electricity"), "Activities"] = new_demand.values
            df.loc[("Factors", "Electricity"), ("Institution", "Household")] += hh_power_demand.sum(
                axis=0
            ) + hh_power_tax.sum(axis=0)

        # The grid recieves revenues from supplying electricity
        df.loc[("Institution", "Grid"), ("Factors", "Electricity")] = df.loc[
            ("Factors", "Electricity")
        ].sum()

        # Don't want to do international trade in electricity factor, only in raw power outputs
        # Reset their electricity purchases to zero
        # exports_of_energy = df.loc[('Factors', 'Electricity'), ('Trade', 'Rest of World')]
        # df.loc[('Institution', 'Grid'), ('Factors', 'Electricity')] -= exports_of_energy
        # df.loc[('Factors', 'Electricity'), ('Trade', 'Rest of World')] = 0.0

        # # Restore exports of raw power outputs
        # power_slice = [('Domestic Commodities', ) + (x,) for x in power_suppliers]
        # exported_energy = old_df.loc[power_slice, ("Trade", 'Rest of World')]
        # df.loc[power_slice, ('Trade', 'Rest of World')] = exported_energy

        # # The grid doesn't buy exported power
        # df.loc[power_slice, ('Institution', 'Grid')] -= exported_energy

    if to_3x3:
        from cge_modeling.gams.gams_constants import AGR_CODES, MFG_CODES, SERV_CODES

        SERV_CODES = SERV_CODES.copy()
        del SERV_CODES[SERV_CODES.index("TnD")]
        groups = ["Agriculture", "Manufacturing", "Service"]
        codes = [AGR_CODES, MFG_CODES, SERV_CODES]
        for group, codes in zip(groups, codes):
            df = merge_values(df, "Activities", codes, group)
            df = merge_values(df, "Domestic Commodities", codes, group)
            df = merge_values(df, "Imported Commodities", codes, group)
            df = merge_values(df, "Import Sales Tax", codes, group)
            df = merge_values(df, "Domestic Sales Tax", codes, group)

    return df

In [18]:
sam_grid = aggregate_SAM(SRB, to_3x3=False, add_grid=False).loc[
    reduced_group_order, reduced_group_order
]

In [706]:
col_subset = [col for col in sorted_cols_reduced if col in sam_grid.columns]

In [707]:
sam_grid = sam_grid.loc[col_subset, col_subset]  # .to_csv('SRB_3x3_no_grid_sorted.csv')

In [774]:
df.sel(country='SRB', col_group='Institution', col_value='Investment',
       row_group='Domestic Commodities').to_dataframe()['SAM'].

row_value
AFS       0.000081
ATP       0.013802
BPH       0.019303
B_T       0.007235
CHM       0.119554
            ...   
WHT       2.732310
WOL       0.018076
WTP       0.026676
WTR       0.202763
WindBL    0.000000
Name: SAM, Length: 76, dtype: float64

In [764]:
sam_grid.loc["Domestic Commodities", "Trade"].replace({0: np.nan}).dropna(how="all")

value,Rest of World
value,Unnamed: 1_level_1
AFS,1344.654754
ATP,206.880865
BPH,276.261625
B_T,855.228325
CHM,929.449889
...,...
WHT,67.518765
WOL,2.974511
WTP,35.886078
WTR,0.201111


In [716]:
sam_grid.to_csv("data/SRB_SAM_3x3.csv")

In [715]:
power_suppliers = [
    "CoalP",
    "GasP",
    "HydroP",
    "NuclearP",
    "OilP",
    "OtherP",
    "SolarP",
    "WindP",
    "TnD",
]
power_slice = sorted(
    [
        (f"{group} Commodities",) + (x,)
        for x in power_suppliers
        for group in ["Domestic", "Imported"]
    ]
)
sam.loc["Domestic Commodities", :].replace({0: np.nan}).dropna(how="all", axis=1)

group,Activities,Activities,Activities,Activities,Activities,Activities,Activities,Activities,Activities,Activities,Activities,Institution,Institution,Institution,Trade
value,Agriculture,CoalP,GasP,HydroP,Manufacturing,OilP,OtherP,Service,SolarP,TnD,WindP,Government,Household,Investment,Rest of World
value,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
Agriculture,4085.234754,0.419976,0.02849,0.095345,114.066025,0.019352,0.003265,1011.432132,0.000173,0.31094,0.000621,21.601966,5973.953733,92.730599,3743.191808
CoalP,134.19349,92.9909,15.7972,22.485,463.286079,3.88583,0.300829,407.686005,0.03256,48.0904,0.128874,,947.27,,205.843606
GasP,19.69817,13.65007,2.31886,3.300554,68.005475,0.570398,0.044158,59.843922,0.00478,7.05916,0.018917,,139.0489,,30.215652
HydroP,35.548366,24.6337,4.18474,5.95635,122.726321,1.02937,0.079691,107.997567,0.008625,12.7393,0.034139,,250.936,,54.528793
Manufacturing,667.181898,903.970868,25.838925,26.798062,4848.684766,140.017279,3.669861,2807.227116,0.048508,87.461633,0.174657,131.295299,1286.866232,493.97527,13183.58958
NuclearP,,,,,,,,,,,,,,,
OilP,14.710455,10.1938,1.731709,2.46483,50.785997,0.425969,0.032977,44.690999,0.003569,5.27173,0.014127,,103.841,,22.564836
OtherP,0.89397,0.619487,0.105238,0.14979,3.086321,0.025887,0.002004,2.71592,0.000217,0.320368,0.000859,,6.31052,,1.371289
Service,2444.494019,194.957388,71.289411,45.639626,3164.157432,9.263161,1.562946,15381.094547,0.082614,240.816256,0.297459,6126.875879,14334.88843,3831.679215,5377.064931
SolarP,0.059985,0.041567,0.007061,0.010051,0.207091,0.001737,0.000134,0.182237,1.5e-05,0.021497,5.8e-05,,0.423434,,0.092013


group,Domestic Commodities,Domestic Commodities,Domestic Commodities,Domestic Commodities,Domestic Commodities,Domestic Commodities,Domestic Commodities,Domestic Commodities,Domestic Commodities,Imported Commodities,Imported Commodities,Imported Commodities,Imported Commodities,Imported Commodities,Imported Commodities,Imported Commodities,Imported Commodities,Imported Commodities
value,CoalP,GasP,HydroP,NuclearP,OilP,OtherP,SolarP,TnD,WindP,CoalP,GasP,HydroP,NuclearP,OilP,OtherP,SolarP,TnD,WindP
value,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
Rest of World,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,167.134454,79.269871,45.346441,63.812385,8.27672,25.207231,10.623319,211.661432,18.093118
