In [1]:
import pandas as pd
import xarray as xr
import openpyxl
import numpy as np

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
activities = [
    "Paddy rice",
    "Wheat",
    "Cereal grains nec",
    "Vegetables, fruit, nuts",
    "Oil seeds",
    "Sugar cane, sugar beet",
    "Plant-based fibers",
    "Crops nec",
    "Bovine cattle, sheep and goats, horses",
    "Animal products nec",
    "Raw milk",
    "Wool, silk-worm cocoons",
    "Forestry",
    "Fishing",
    "Coal",
    "Oil",
    "Gas",
    "Other Extraction (formerly omn Minerals nec)",
    "Bovine meat products",
    "Meat products nec",
    "Vegetable oils and fats",
    "Dairy products",
    "Processed rice",
    "Sugar",
    "Food products nec",
    "Beverages and tobacco products",
    "Textiles",
    "Wearing apparel",
    "Leather products",
    "Wood products",
    "Paper products, publishing",
    "Petroleum, coal products",
    "Chemical products",
    "Basic pharmaceutical products",
    "Rubber and plastic products",
    "Mineral products nec",
    "Ferrous metals",
    "Metals nec",
    "Metal products",
    "Computer, electronic and optical products",
    "Electrical equipment",
    "Machinery and equipment nec",
    "Motor vehicles and parts",
    "Transport equipment nec",
    "Manufactures nec",
    "Electricity transmission and distribution",
    "Gas manufacture, distribution",
    "Water",
    "Construction",
    "Trade",
    "Accommodation, Food and service activities",
    "Transport nec",
    "Water transport",
    "Air transport",
    "Warehousing and support activities",
    "Communication",
    "Financial services nec",
    "Insurance (formerly isr)",
    "Real estate activities",
    "Business services nec",
    "Recreational and other services",
    "Public Administration and defense",
    "Education",
    "Human health and social work activities",
    "Dwellings",
]

factors = [
    'Capital', 'Labor', 'Natural resources', 'Electricity'
]

institutions = [
    'Household', 'Grid', 'Govt'
]

energy_gtap = [
    "Nuclear power",
    "Coal power baseload",
    "Gas power baseload",
    "Wind power",
    "Hydro power baseload",
    "Oil power baseload",
    "Other baseload",
    "Gas power peakload",
    "Hydro power peakload",
    "Oil power peakload",
    "Solar power",
]

energy_sam = ['Nuclear power',
 'Coal power',
 'Gas power',
 'Wind power',
 'Hydro power',
 'Oil power',
 'Solar power',
 'Other power']

index = [('Factor', ) + (factor, ) for factor in factors]
index += [('Institution', ) + (institution, ) for institution in institutions]
index += [('Production', ) + (activity, ) for activity in activities]
index += [('Production', ) + (activity, ) for activity in energy_sam]

index += [('Activities', ) + (activity, ) for activity in activities]
index += [('Activities', ) + (activity, ) for activity in energy_sam]

index += [('Income Tax', 'Household')]
index += [('Sales Tax', ) + (activity, ) for activity in activities]
index += [('Sales Tax', ) + (activity, ) for activity in energy_sam]

index += [('Use Tax', ) + (factor, ) for factor in factors]
index += [('Other', 'Capital Accumulation')]

In [3]:
path = "data/GTAP_raw_data.xlsx"
# book = openpyxl.open(path)

sheet_names = [
    "Country Codes",
    "Commodities",
    "Labor Types",
    "Factor Types",
    "Primary Factor Purchases, BP",
    "Primary Factor Purchases, PP",
    "Endowment Supply Value (Net)",
    "Factor Subsidy Payments (Gross)",
    "Factor Employment Tax Rev",
    "Make Matrix, After Tax",
    "Make Matrix, Before Tax",
    "Population",
    "Net output tax",
    "Net Saving",
    "Imports, CIF Price",
    "Capital Depreciation",
    "Domestic Purchases by Firms, BP",
    "Domestic Purchases by Firms, PP",
    "Domestic Purchases by Gov, BP",
    "Domestic Purchases by Gov, PP",
    "Domestic Purchases by Inv, BP",
    "Domestic Purchases by Inv, PP",
    "Domestic Purchases by HH, BP",
    "Domestic Purchases by HH, PP",
    "Capital Stock",
    "Import Purchases, by Firms, BP",
    "Import Purchases, by Firms, PP",
    "Import Purchases, by Gov, BP",
    "Import Purchases, by Gov, PP",
    "Import Purchases, by Inv, BP",
    "Import Purchases, by Inv, PP",
    "Import Purchases, by HH, BP",
    "Import Purchases, by HH, PP",
]

In [4]:
country_codes = pd.read_excel(
    path, sheet_name="Country Codes", index_col=0, header=None, names=["code", "country"]
)
commodity_codes = pd.read_excel(
    path, sheet_name="Commodities", index_col=0, header=None, names=["code", "commodity"]
)
energy_codes = commodity_codes.loc[lambda x: np.isin(x, energy_gtap)]
commodity_codes = commodity_codes.loc[lambda x: ~np.isin(x, energy_gtap)]

labor_codes = pd.read_excel(
    path, sheet_name="Labor Types", index_col=0, header=None, names=["code", "labor"]
)
factor_codes = pd.read_excel(
    path, sheet_name="Factor Types", index_col=0, header=None, names=["code", "factor"]
)
agent_codes = {"Firms": "firms", "Gov": "government", "HH": "household", "Inv": "investment"}
price_codes = {"BP": "base price", "PP": "purchaser price"}
load_codes = {'BL':'base load', 'P':'peak load'}

names = ["country", "commodity", 'energy', 'load', "labor", "factor", "agent", "price"]
codes = [country_codes, commodity_codes, energy_codes, load_codes, labor_codes, factor_codes, agent_codes, price_codes]

CODES = {
    name: x.iloc[:, 0].to_dict() if isinstance(x, pd.DataFrame) else x
    for name, x in zip(names, codes)
}

In [5]:
commodity_to_code = {v: k for k, v in CODES["commodity"].items()}
energy_to_code = {v: k for k, v in CODES["energy"].items()}
activity_codes = [commodity_to_code[x] for x in activities]
energy_codes = [energy_to_code[x] for x in energy_gtap]

In [6]:
coords_by_sheet = {
    "Primary Factor Purchases, BP": {
        "index": ("factor", "commodity"),
        "columns": ("country",),
        "groups": ("price",),
    },
    "Primary Factor Purchases, PP": {
        "index": ("factor", "commodity"),
        "columns": ("country",),
        "groups": ("price",),
    },
    "Endowment Supply Value (Net)": {
        "index": ("factor", "commodity"),
        "columns": ("country",),
        "groups": None,
    },
    "Factor Subsidy Payments (Gross)": {
        "index": ("factor", "commodity"),
        "columns": ("country",),
        "groups": None,
    },
    "Factor Employment Tax Rev": {
        "index": ("factor", "commodity"),
        "columns": ("country",),
        "groups": None,
    },
    "Make Matrix, After Tax": {
        "index": ("commodity", "commodity_bis"),
        "columns": ("country",),
        "groups": ("tax",),
    },
    "Make Matrix, Before Tax": {
        "index": ("commodity", "commodity_bis"),
        "columns": ("country",),
        "groups": ("tax",),
    },
    "Population": {"index": ("country",), "columns": None, "groups": None},
    "Net output tax": {
        "index": ("commodity", "commodity_bis"),
        "columns": ("country",),
        "groups": None,
    },
    "Net Saving": {"index": ("country",), "columns": None, "groups": None},
    "Imports, CIF Price": {
        "index": ("commodity", "country_bis"),
        "columns": ("country",),
        "groups": None,
    },
    "Capital Depreciation": {"index": ("country",), "columns": None, "groups": None},
    "Domestic Purchases by Firms, BP": {
        "index": ("commodity", "commodity_bis"),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Domestic Purchases by Firms, PP": {
        "index": ("commodity", "commodity_bis"),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Domestic Purchases by Gov, BP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Domestic Purchases by Gov, PP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Domestic Purchases by Inv, BP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Domestic Purchases by Inv, PP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Domestic Purchases by HH, BP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Domestic Purchases by HH, PP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Capital Stock": {"index": ("country",), "columns": None, "groups": None},
    "Import Purchases, by Firms, BP": {
        "index": ("commodity", "commodity_bis"),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Import Purchases, by Firms, PP": {
        "index": ("commodity", "commodity_bis"),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Import Purchases, by Gov, BP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Import Purchases, by Gov, PP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Import Purchases, by Inv, BP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Import Purchases, by Inv, PP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Import Purchases, by HH, BP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
    "Import Purchases, by HH, PP": {
        "index": ("commodity",),
        "columns": ("country",),
        "groups": ("agent", "price"),
    },
}

sheet_groups = [
    ("Primary Factor Purchases, BP", "Primary Factor Purchases, PP"),
    ("Endowment Supply Value (Net)",),
    ("Factor Subsidy Payments (Gross)",),
    ("Factor Employment Tax Rev",),
    ("Make Matrix, After Tax", "Make Matrix, Before Tax"),
    ("Population",),
    ("Net output tax",),
    ("Net Saving",),
    ("Imports, CIF Price",),
    ("Capital Depreciation",),
    (
        "Domestic Purchases by Firms, BP",
        "Domestic Purchases by Firms, PP",
        "Domestic Purchases by Gov, BP",
        "Domestic Purchases by Gov, PP",
        "Domestic Purchases by Inv, BP",
        "Domestic Purchases by Inv, PP",
        "Domestic Purchases by HH, BP",
        "Domestic Purchases by HH, PP",
    ),
    ("Capital Stock",),
    (
        "Import Purchases, by Firms, BP",
        "Import Purchases, by Firms, PP",
        "Import Purchases, by Gov, BP",
        "Import Purchases, by Gov, PP",
        "Import Purchases, by Inv, BP",
        "Import Purchases, by Inv, PP",
        "Import Purchases, by HH, BP",
        "Import Purchases, by HH, PP",
    ),
]

In [90]:
def repeat(f, x, n):
    for _ in range(n):
        x = f(x)
    return x

def determine_indices(coords):
    n_headers = None if coords["columns"] is None else len(coords["columns"])
    n_index = len(coords["index"])
    
    return n_headers, n_index

def set_df_index_names(df, sheet, coords):
    n_headers, n_index = determine_indices(coords)
    if n_index > 1:
        df.index.names = coords["index"]
    else:
        df.index.name = coords["index"][0]
    if n_headers is None:
        name = sheet
    else:
        if n_headers > 1:
            df.columns.names = coords["columns"]
        else:
            df.columns.name = coords["columns"][0]
    
    return df

def make_tokens_from_sheet_name(sheet, coords):
    n_headers, n_index = determine_indices(coords)
    if n_headers is None:
        return
    tokens = [
        x.strip()
        for token in sheet.split(",")
        for x in token.split("by")
        if len(x.strip()) > 0
    ]
    return tokens

def rebuild_df_index_cols(df, sheet, coords):
    tokens = make_tokens_from_sheet_name(sheet, coords)
    if tokens is None:
        return df, None, sheet, tokens
    
    groups = coords["groups"]
    index_cols = df.index.names
    df = df.reset_index(drop=False)
    if groups is None:
        name = " ".join(tokens)
    else:
        name = tokens.pop(0)
        for group, token in zip(groups, tokens):
            df[group] = token
            index_cols += [group]
    return df, index_cols, name, tokens

def add_duplicate_indices(df, name, index_cols, names=None, index=None):
    if names is None:
        names = ["Domestic Purchases", "Import Purchases"]
    if index is None:
        index = "commodity_bis"
    else:
        assert index.endswith('bis')
    if name.title() in names:
        if index not in df.columns:
            df[index] = df[index.replace('_bis', '')]
            index_cols += [index]
    
    return df, index_cols

def load_sheet(path, sheet, coords):
    n_headers, n_index = determine_indices(coords)
    df = pd.read_excel(path, sheet_name=sheet, index_col=list(range(n_index)))
    df = set_df_index_names(df, sheet, coords)
    df, index_cols, name, tokens = rebuild_df_index_cols(df, sheet, coords)
    df, index_cols = add_duplicate_indices(df, name, index_cols)
    if index_cols:
        df.set_index(sorted(index_cols), inplace=True)
        df = repeat(lambda x: x.stack(), df, n_headers if n_headers is not None else 1)

    return df, name, tokens

def get_load_type(x):
    if x.endswith('BL'):
        return 'BL'
    elif x.endswith('P'):
        return 'P'
    return None


def list_replace(l, sub_dict):
    new_l = l.copy()
    for old, new in sub_dict.items():
        idx = l.index(old)
        new_l[idx] = new
    return new_l


def split_commodity_df(df):
    indices = list(data_df.index.names)
    com_idx = indices.index('commodity')
    
    energy_indices = list_replace(indices, {'commodity':'energy'})
    energy_indices += ['load']

    commodity_df = (data_df.reset_index()
                        .loc[lambda x: ~np.isin(x.commodity, energy_codes)]
                        .set_index(indices)
                        .loc[:, 0])

    energy_df = (data_df.reset_index().loc[lambda x: np.isin(x.commodity, energy_codes)]
                    .rename(columns={'commodity':'energy'})
                    .assign(energy = lambda x: x.energy.str.replace('BL|P', '', regex=True))
                    .assign(load = lambda x: x.energy.apply(get_load_type))
                    .set_index(energy_indices)
                    .loc[:, 0])
    
    return commodity_df, energy_df


def split_commodity_commodity_bis_df(df):
    indices = list(data_df.index.names)
    
    # Special case: commodity and commodity_bis are always the same
    if np.all(data_df.reset_index().commodity == data_df.reset_index().commodity_bis):
        return split_commodity_df(df)
    
    commodity_df = data_df.reset_index().loc[lambda x: ~np.isin(x.commodity, energy_codes)]

    commodity_commodity_df = (commodity_df.loc[lambda x: ~np.isin(x.commodity_bis, energy_codes)]
                                 .set_index(indices)
                                 .loc[:, 0])
    
    commodity_energy_df = (commodity_df.loc[lambda x: np.isin(x.commodity_bis, energy_codes)]
                               .rename(columns={'commodity_bis':'energy'})
                               .assign(energy = lambda x: x.energy.str.replace('BL|P', '', regex=True))
                               .assign(load = lambda x: x.energy.apply(get_load_type))
                               .set_index(list_replace(indices, {'commodity_bis':'energy'}) + ['load']))
    
    energy_df = (data_df.reset_index().loc[lambda x: np.isin(x.commodity, energy_codes)]
                    .rename(columns={'commodity':'energy'})
                    .assign(energy = lambda x: x.energy.str.replace('BL|P', '', regex=True))
                    .assign(load = lambda x: x.energy.apply(get_load_type)))
    
    energy_commodity_df = (energy_df.loc[lambda x: ~np.isin(x.commodity_bis, energy_codes)]
                              .rename(columns={'commodity_bis':'commodity'})
                              .set_index(list_replace(indices, {'commodity':'energy', 
                                                                'commodity_bis':'commodity'}) + ['load']))
    
    energy_energy_df = (energy_df.loc[lambda x: np.isin(x.commodity_bis, energy_codes)]
                           .rename(columns={'commodity_bis':'energy_bis'})
                           .assign(load_bis = lambda x: x.energy_bis.apply(get_load_type))
                           .set_index(list_replace(indices, {'commodity':'energy', 
                                                             'commodity_bis':'energy_bis'}) + ['load', 'load_bis']))
    
    return commodity_commodity_df, commodity_energy_df, energy_commodity_df, energy_energy_df
    

def check_df_for_splits(df, coords):
    if 'commodity' not in coords['index']:
        return df
    
    # Case 1: Only commodity, no cross-term. Just split.
    if 'commodity' in coords['index'] and 'commodity_bis' not in coords['index']:
        return split_commodity_df(df)
    
    # Case 2: Two commodity indices. Need to account for the cross-terms
    else:
        return split_commodity_commodity_bis_df(df)

data_vars = {}
for sheets in sheet_groups:
    group_stack = []
    group_names = []
    group_dims = []
    for sheet in sheets:
        coords = coords_by_sheet[sheet]
        df, name, tokens = load_sheet(path, sheet, coords)

        group_stack.append(df)
        group_dims.append(tokens)
    if len(group_stack) > 1:
        data_df = pd.concat(group_stack)
    else:
        data_df = group_stack[0]
        if isinstance(data_df, pd.DataFrame):
            data_df = data_df.iloc[:, 0]
    
    data_dfs = check_df_for_splits(data_df, coords)
    if len(data_dfs) == 4:
        break
#     if 'commodity' in coords['index']:
#         commodity_df, energy_df = split_commodity_df(data_df)
#         data_vars[name.title()] = commodity_df.to_xarray()
#         data_vars['Energy ' + name.title()] = energy_df.to_xarray()
#     else:
#         data_vars[name.title()] = data_df.to_xarray()

In [89]:
data_dfs

2

In [64]:
gtap = xr.Dataset(data_vars)

In [65]:
gtap

In [66]:
sam_path = "data/albania_sam.csv"
# df = pd.read_csv(sam_path, header=[0, 1], index_col=[0, 1]).fillna(0)
df = pd.DataFrame(0.0, 
                  index=pd.MultiIndex.from_tuples(index), 
                  columns=pd.MultiIndex.from_tuples(index),
                  dtype='float64')

In [113]:
labor = (
    gtap.sel(country="ALB", price="BP")["Primary Factor Purchases"]
    .sel(factor=labor_codes.index.values.tolist())
    .sum(dim="factor")
)
capital = gtap.sel(country="ALB", price="BP")["Primary Factor Purchases"].sel(factor="Capital")
resource = gtap.sel(country="ALB", price="BP")["Primary Factor Purchases"].sel(factor="NatlRes")
land = gtap.sel(country="ALB", price="BP")["Primary Factor Purchases"].sel(factor="Land")

gtap['Primary Factor Purchases'].sel(factor='NatlRes', country='ALB', price='BP')

X = gtap.sel(country="ALB", price="BP", agent="Firms")["Domestic Purchases"]
electricity = X.sel(commodity='TnD', commodity_bis=activity_codes)


C_bp = gtap.sel(country="ALB", price="BP", agent="HH")["Domestic Purchases"]
C_pp = gtap.sel(country="ALB", price="PP", agent="HH")["Domestic Purchases"]

I_bp = gtap.sel(country="ALB", price="BP", agent="Inv")["Domestic Purchases"]
I_pp = gtap.sel(country="ALB", price="PP", agent="Inv")["Domestic Purchases"]

C_G = gtap.sel(country="ALB", price="BP", agent="Gov")["Domestic Purchases"]
S = gtap["Net Saving"].sel(country="ALB").values

K_tax_rev = gtap["Factor Employment Tax Rev"].sel(country="ALB", factor="Capital")
L_tax_rev = (
    gtap["Factor Employment Tax Rev"]
    .sel(country="ALB", factor=labor_codes.index.values.tolist())
    .sum(dim="factor")
)
NR_tax_rev = gtap["Factor Employment Tax Rev"].sel(country="ALB", factor="NatlRes")

In [143]:
electricity

In [114]:
df.loc[("Factor", "Labor"), "Activities"] = labor.sel(commodity=activity_codes).values
df.loc[("Factor", "Capital"), "Activities"] = capital.sel(commodity=activity_codes).values
df.loc[("Factor", "Natural resources"), "Activities"] = resource.sel(commodity=activity_codes).values
df.loc[("Factor", "Electricity"), "Activities"] = electricity

df.loc["Production", "Activities"] = X.sel(
    commodity=activity_codes, commodity_bis=activity_codes
).values
df.loc["Production", ("Institution", "Household")] = np.diag(
    C_bp.sel(commodity=activity_codes, commodity_bis=activity_codes).values
)
df.loc["Production", ("Institution", "Govt")] = np.diag(
    C_G.sel(commodity=activity_codes, commodity_bis=activity_codes).values
)
df.loc[("Other", "Capital Accumulation"), ("Institution", "Household")] = S
df.loc[("Use Tax", "Capital"), "Production"] = K_tax_rev.sel(commodity=activity_codes).values
df.loc[("Use Tax", "Labor"), "Production"] = L_tax_rev.sel(commodity=activity_codes).values
df.loc[("Use Tax", "Natural resources"), "Production"] = NR_tax_rev.sel(commodity=activity_codes).values
df.loc["Sales Tax", ("Institution", "Household")] = np.diag(
    (C_pp - C_bp).sel(commodity=activity_codes, commodity_bis=activity_codes)
)
df.loc["Production", ("Other", "Capital Accumulation")] = np.diag(I_pp)

# Combine baseload types
for energy in energy_types:
    cols = [x for x in activities if energy.lower() in x.lower()]
    if len(cols) == 0:
        continue
    for category in ['Activities', 'Production', 'Sales Tax']:
        col_sum = df.loc[(category, cols), :].sum(axis=0)
        row_sum = df.loc[:, (category, cols)].sum(axis=1)
        df.loc[(category, energy), :] = row_sum
        df.loc[:, (category, energy)] = col_sum
    
# Change Electricity transmission and distribution to a factor


# Totals
df.loc[('Institution', 'Household'), ('Factor', 'Labor')] = labor.sum()
df.loc[('Institution', 'Household'), ('Factor', 'Capital')] = capital.sum()
df.loc[('Institution', 'Household'), ('Factor', 'Natural resources')] = resource.sum()
df.loc[('Institution', 'Grid'), ('Factor', 'Electricity')] = electricity.sum()

In [116]:
for energy in energy_types:
    cols = [x for x in activities if energy.lower() in x.lower()]
    if len(cols) == 2:
        break

Unnamed: 0_level_0,Unnamed: 1_level_0,Factor,Factor,Factor,Factor,Institution,Institution,Institution,Production,Production,Production,...,Sales Tax,Sales Tax,Sales Tax,Sales Tax,Sales Tax,Use Tax,Use Tax,Use Tax,Use Tax,Other
Unnamed: 0_level_1,Unnamed: 1_level_1,Capital,Labor,Natural resources,Electricity,Household,Grid,Govt,Paddy rice,Wheat,Cereal grains nec,...,Other baseload,Gas power peakload,Hydro power peakload,Oil power peakload,Solar power,Capital,Labor,Natural resources,Electricity,Capital Accumulation
Factor,Capital,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Factor,Labor,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Factor,Natural resources,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Factor,Electricity,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Institution,Household,5479.083167,4194.76711,129.20395,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Use Tax,Capital,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000317,0.066536,0.096063,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Use Tax,Labor,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.023887,5.008802,7.231582,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Use Tax,Natural resources,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Use Tax,Electricity,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [142]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Factor,Factor,Factor,Factor,Institution,Institution,Institution,Production,Production,Production,...,Sales Tax,Sales Tax,Sales Tax,Sales Tax,Sales Tax,Use Tax,Use Tax,Use Tax,Use Tax,Other
Unnamed: 0_level_1,Unnamed: 1_level_1,Capital,Labor,Natural resources,Electricity,Household,Grid,Govt,Paddy rice,Wheat,Cereal grains nec,...,Other baseload,Gas power peakload,Hydro power peakload,Oil power peakload,Solar power,Capital,Labor,Natural resources,Electricity,Capital Accumulation
Factor,Capital,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Factor,Labor,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Factor,Natural resources,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Factor,Electricity,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Institution,Household,5479.083167,4194.76711,129.20395,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Use Tax,Capital,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000317,0.066536,0.096063,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Use Tax,Labor,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.023887,5.008802,7.231582,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Use Tax,Natural resources,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Use Tax,Electricity,0.000000,0.00000,0.00000,0.0,0.00,0.0,0.0,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [132]:
pd.merge(*[df.xs(axis=0, level=1, key=key) for key in cols])

Unnamed: 0_level_0,Factor,Factor,Factor,Factor,Institution,Institution,Institution,Production,Production,Production,...,Sales Tax,Sales Tax,Sales Tax,Sales Tax,Sales Tax,Use Tax,Use Tax,Use Tax,Use Tax,Other
Unnamed: 0_level_1,Capital,Labor,Natural resources,Electricity,Household,Grid,Govt,Paddy rice,Wheat,Cereal grains nec,...,Other baseload,Gas power peakload,Hydro power peakload,Oil power peakload,Solar power,Capital,Labor,Natural resources,Electricity,Capital Accumulation
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [128]:
col_sum = pd.merge(*[df.xs(axis=0, level=1, key=key) for key in cols]).sum(axis=0)
pd.merge(*[df.xs(axis=1, level=1, key=key) for key in cols], left_index=True, right_index=True)

Unnamed: 0,Unnamed: 1,Production_x,Activities_x,Sales Tax_x,Production_y,Activities_y,Sales Tax_y
Factor,Capital,0.0,,0.0,0.0,,0.0
Factor,Labor,0.0,0.0,0.0,0.0,0.0,0.0
Factor,Natural resources,0.0,,0.0,0.0,,0.0
Factor,Electricity,0.0,,0.0,0.0,,0.0
Institution,Household,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
Use Tax,Capital,,0.0,0.0,,0.0,0.0
Use Tax,Labor,0.0,0.0,0.0,0.0,0.0,0.0
Use Tax,Natural resources,,0.0,0.0,,0.0,0.0
Use Tax,Electricity,0.0,0.0,0.0,0.0,0.0,0.0


In [124]:
col_sum

Factor       Capital                 0.0
             Labor                   0.0
             Natural resources       0.0
             Electricity             0.0
Institution  Household               0.0
                                    ... 
Use Tax      Capital                 0.0
             Labor                   0.0
             Natural resources       0.0
             Electricity             0.0
Other        Capital Accumulation    0.0
Length: 241, dtype: float64

In [93]:
df.to_csv(sam_path)

In [109]:
row_sum

Factor       Capital              0.0
             Labor                0.0
             Natural resources    0.0
             Electricity          0.0
Institution  Household            0.0
                                 ... 
Production   Hydro power          0.0
Sales Tax    Hydro power          0.0
Activities   Oil power            0.0
Production   Oil power            0.0
Sales Tax    Oil power            0.0
Length: 253, dtype: float64

In [108]:
col_sum

Factor       Capital              0.000000
             Labor                0.000000
             Natural resources    0.000000
             Electricity          0.000000
Institution  Household            0.042843
                                    ...   
Production   Hydro power          0.000000
Sales Tax    Hydro power          0.000000
Activities   Oil power            0.000000
Production   Oil power            0.000000
Sales Tax    Oil power            0.000000
Length: 253, dtype: float64

In [84]:
df.loc[(category, energy), :]

Factor       Capital                 0.0
             Labor                   0.0
             Natural resources       0.0
             Electricity             0.0
Institution  Household               0.0
                                    ... 
Use Tax      Electricity             0.0
Other        Capital Accumulation    0.0
Activities   Coal power              0.0
Production   Coal power              0.0
Sales Tax    Coal power              0.0
Name: (Activities, Gas power), Length: 244, dtype: float64