In [1]:
import os, os.path
import numpy as np
import pandas as pd
import model_attributes as ma
from attribute_table import AttributeTable
import setup_analysis as sa
import support_functions as sf
import importlib
import time
import warnings
import matplotlib.pyplot as plt
import model_afolu as mafl
from typing import *


  for desig, df in df_by_designation:


In [251]:

def split_stocks(
    df_stocks: pd.DataFrame,
    dict_fracs: Union[Dict[str, Dict[str, float]], None],
    dict_to_sisepuede_cat: Union[Dict[str, Dict[str, float]], None],
    item_code_start: int, 
    attribute_cw_fao_cats: AttributeTable,
    dict_item_to_code: Union[Dict, None] = None,
    field_stock: str = "stocks",
    round_stocks: bool = True,
    update_attribute: bool = True,
) -> pd.DataFrame:
    """
    Split stocks using fractions specified
    
    Function Arugments
    -----------------
    - df_stocks: stocks_0 data frame
    - dict_fracs: dicationary mapping Item to new dictionary giving
        new item and fraction
    - dict_to_sisepuede_cat: dictionary mapping Item to sisepuede category
    - item_code_start: new item code starting point
    - attribute_cw_fao_cats: attribute table to refresh
    
    Keyword Arugments
    -----------------
    - dict_item_to_code: dctionary mapping new item to item code
    - round_stocks: round to integer?
    - update_attribute: set to false to leave alone
    """
    
    # check stocks specified
    all_items_base = sorted(list(df_stocks["Item"].unique()))
    items_iter = (
        [x for x in list(dict_fracs.keys()) if x in all_items_base]
        if isinstance(dict_fracs, dict)
        else None
    )
    if items_iter is not None:
        items_iter = None if (len(items_iter) == 0) else items_iter
    
    if items_iter is None: 
        return df_stocks
    
    dict_fracs = dict((k, v) for k, v in dict_fracs.items() if k in items_iter)

    # otherwise split
    df_out = [
        df_stocks[
            ~df_stocks["Item"].isin(items_iter)
        ].reset_index(drop = True)
    ]
    
    # some attribute information
    
    # keys to drop from table (will be added)
    attr_keys_drop = set(
        sum(
            [
                list(v.keys()) for v in dict_fracs_to_split.values() 
                 if isinstance(v, dict)
            ], 
            []
        )
    )
    
    # new rows
    attr_new = {
        attribute_cw_fao_cats.key: [],
        "sisepuede_demand_subsector": [],
        "sisepuede_demand_category": [],
        "agg_to_fao_category": []
    }
    
    
    # iteration
    
    dict_item_to_item_code_out = {}
    item_code_new = item_code_start
    
    for k, v in dict_fracs.items():
        
        df_stocks_splitting = sf.subset_df(
            df_stocks,
            {
                "Item": [k]
            }
        )
        
        
        # get attribute information
        subsec = attribute_cw_fao_cats.field_maps.get(f"{attribute_cw_fao_cats.key}_to_sisepuede_demand_subsector").get(k)
        dem_cat = attribute_cw_fao_cats.field_maps.get(f"{attribute_cw_fao_cats.key}_to_sisepuede_demand_category").get(k)
        agg_to_fao_cat = attribute_cw_fao_cats.field_maps.get(f"{attribute_cw_fao_cats.key}_to_agg_to_fao_category").get(k)
        
        df_split_out = []
        
        for new_item, frac in v.items():
            
            # copy and overwrite item info
            df_cur = df_stocks_splitting.copy()
            df_cur["Item"] = new_item
            
            # get code
            ic = (
                dict_item_to_code.get(new_item)
                if isinstance(dict_item_to_code, dict)
                else item_code_new
            )
            ic = item_code_new if (ic is None) else item_code_new
            dict_item_to_item_code_out.update({new_item: ic})
            df_cur["Item Code"] = ic
            
            # update count
            vec_new = frac*np.array(df_cur[field_stock])
            vec_new = (
                np.round(vec_new)
                if round_stocks
                else vec_new
            )
            df_cur[field_stock] = vec_new
            
            # add to output
            df_split_out.append(df_cur)
            
            
            # update attributeand move to next iteration
            dem_cat_new = dict_to_sisepuede_cat.get(new_item, dem_cat)
            attr_new[attribute_cw_fao_cats.key].append(new_item)
            attr_new["sisepuede_demand_subsector"].append(subsec)
            attr_new["sisepuede_demand_category"].append(dem_cat_new)
            attr_new["agg_to_fao_category"].append(agg_to_fao_cat)
            
            item_code_new += 1
            
        df_split_out = pd.concat(df_split_out, axis = 0)
        df_out.append(df_split_out)
        
        
    # build new attribute table
    attr_tab_out = attribute_cw_fao_cats
    
    if update_attribute:
        
        df_attr_new = attr_tab_out.table
        df_attr_new = (
            df_attr_new[
                ~df_attr_new[attr_tab_out.key].isin(attr_keys_drop)
            ]
            .reset_index(drop = True)
        )
    
        attr_new = (
            pd.concat(
                [
                    df_attr_new, 
                    pd.DataFrame(attr_new)
                ],
                axis = 0
            )
            .sort_values(by = ["sisepuede_demand_subsector", "sisepuede_demand_category", attribute_cw_fao_cats.key])
            .reset_index(drop = True)
        )
        attr_tab_out = AttributeTable(attr_new, attribute_cw_fao_cats.key)
    
    # concat output dataframe
    df_out = pd.concat(df_out, axis = 0)
    
    return df_out, attr_tab_out, dict_item_to_item_code_out

In [339]:
# build composite data frame for estimation
def build_df_est(
    df_exports: pd.DataFrame,
    df_imports: pd.DataFrame,  
    df_production: pd.DataFrame,
    df_stocks: pd.DataFrame,
    dict_items_repl: dict,
    fields_dat: list,
    fields_grp: list, 
    agg_func: str = "sum",
    dict_map_fao_cat_to_new_fao_category: Union[Dict[str, str], None] = None,
) -> pd.DataFrame:
    """
    Keyword Arguments
    -----------------
    - dict_map_fao_cat_to_new_fao_category: dictionary to use to map some fao cats to others
        during aggregation
    """
    
    # collect and fill na
    df_est = pd.merge(df_imports, df_exports, how = "outer")
    df_est = pd.merge(df_est, df_production, how = "outer")
    df_est = pd.merge(df_est, df_stocks, how = "outer")
    df_est.fillna(0.0, inplace = True)
    
    if isinstance(dict_map_fao_cat_to_new_fao_category, dict):
        # get map of items to item code
        dict_item_to_item_code = sf.build_dict(
            df_est[["Item", "Item Code"]]
        )

        # replace items
        df_est["Item"].replace(dict_map_fao_cat_to_new_fao_category, inplace = True)

        # aggregate
        df_est = sf.simple_df_agg(
            df_est,
            [x for x in fields_grp if x != "Item Code"],
            dict((x, agg_func) for x in fields_dat)
        )

        # add ideam code
        df_est["Item Code"] = df_est["Item"].replace(dict_item_to_item_code)
        
    
    # drop rows where exports exceed production (here, we have 1:1 connection for items)
    df_est = df_est[
        (df_est["exports"] <= df_est["production"])
        #(df_est["imports"] <= df_est["production"])
    ].reset_index(drop = True)
    
    # calculate demand
    E = np.array(df_est["exports"])
    I = np.array(df_est["imports"])
    P = np.array(df_est["production"])
    M = P + I - E
    df_est["demand"] = M
    fields_dat.append("demand")
    
    df_est_out = df_est.copy()
    df_est["Item"] = df_est["Item"].replace(dict_items_repl)
    dict_agg = dict([(x, "first") for x in fields_grp])
    dict_agg.update(dict([(x, agg_func) for x in fields_dat]))
    fields_drop = [x for x in df_est.columns if (x not in fields_dat + fields_grp)]
    
    df_agg = df_est.drop(fields_drop, axis = 1).groupby(fields_grp).agg(dict_agg).reset_index(drop = True)
    
    return df_agg, df_est_out



# function to clean country names
def clean_region_name(x: str, clean_type = "for_data") -> str:
    nm = x.split("(")[0].strip()
    
    if clean_type == "for_data":
        nm = nm.lower().replace(" ", "_")
    elif clean_type == "for_key":
        None
    
    return nm



# filter and order data frames
def filter_dfs(
    df: pd.DataFrame, 
    dict_subset: dict,
    fields_keep: list
) -> pd.DataFrame:
    
    df_out = sf.subset_df(df, dict_subset)
    fields_drop = [x for x in df_out.columns if x not in fields_keep]
    df_out.drop(fields_drop, axis = 1, inplace = True)
    df_out.reset_index(drop = True, inplace = True)
    
    return df_out



# read in FAO data
def get_faostat_data(
    dir_faostat: str, 
    nm: str,
    encode: str = "ISO-8859-1"
) -> pd.DataFrame:
    
    fp_read = os.path.join(dir_faostat, nm, f"{nm}.csv")
    sf.check_path(fp_read)
    
    df_ret = pd.read_csv(
        fp_read, 
        encoding = encode
    )
    
    return df_ret



def split_stocks(
    df_stocks: pd.DataFrame,
    dict_fracs: Union[Dict[str, Dict[str, float]], None],
    dict_to_sisepuede_cat: Union[Dict[str, Dict[str, float]], None],
    item_code_start: int, 
    attribute_cw_fao_cats: AttributeTable,
    dict_item_to_code: Union[Dict, None] = None,
    field_stock: str = "stocks",
    round_stocks: bool = True,
    update_attribute: bool = True,
) -> pd.DataFrame:
    """
    Split stocks using fractions specified
    
    Function Arugments
    -----------------
    - df_stocks: stocks_0 data frame
    - dict_fracs: dicationary mapping Item to new dictionary giving
        new item and fraction
    - dict_to_sisepuede_cat: dictionary mapping Item to sisepuede category
    - item_code_start: new item code starting point
    - attribute_cw_fao_cats: attribute table to refresh
    
    Keyword Arugments
    -----------------
    - dict_item_to_code: dctionary mapping new item to item code
    - round_stocks: round to integer?
    - update_attribute: set to false to leave alone
    """
    
    # check stocks specified
    all_items_base = sorted(list(df_stocks["Item"].unique()))
    items_iter = (
        [x for x in list(dict_fracs.keys()) if x in all_items_base]
        if isinstance(dict_fracs, dict)
        else None
    )
    if items_iter is not None:
        items_iter = None if (len(items_iter) == 0) else items_iter
    
    if items_iter is None: 
        return df_stocks
    
    dict_fracs = dict((k, v) for k, v in dict_fracs.items() if k in items_iter)

    # otherwise split
    df_out = [
        df_stocks[
            ~df_stocks["Item"].isin(items_iter)
        ].reset_index(drop = True)
    ]
    
    # some attribute information
    
    # keys to drop from table (will be added)
    attr_keys_drop = set(
        sum(
            [
                list(v.keys()) for v in dict_fracs_to_split.values() 
                 if isinstance(v, dict)
            ], 
            []
        )
    )
    
    # new rows
    attr_new = {
        attribute_cw_fao_cats.key: [],
        "sisepuede_demand_subsector": [],
        "sisepuede_demand_category": [],
        "agg_to_fao_category": []
    }
    
    
    # iteration
    
    dict_item_to_item_code_out = {}
    item_code_new = item_code_start
    
    for k, v in dict_fracs.items():
        
        df_stocks_splitting = sf.subset_df(
            df_stocks,
            {
                "Item": [k]
            }
        )
        
        
        # get attribute information
        subsec = attribute_cw_fao_cats.field_maps.get(f"{attribute_cw_fao_cats.key}_to_sisepuede_demand_subsector").get(k)
        dem_cat = attribute_cw_fao_cats.field_maps.get(f"{attribute_cw_fao_cats.key}_to_sisepuede_demand_category").get(k)
        agg_to_fao_cat = attribute_cw_fao_cats.field_maps.get(f"{attribute_cw_fao_cats.key}_to_agg_to_fao_category").get(k)
        
        df_split_out = []
        
        for new_item, frac in v.items():
            
            # copy and overwrite item info
            df_cur = df_stocks_splitting.copy()
            df_cur["Item"] = new_item
            
            # get code
            ic = (
                dict_item_to_code.get(new_item)
                if isinstance(dict_item_to_code, dict)
                else item_code_new
            )
            ic = item_code_new if (ic is None) else item_code_new
            dict_item_to_item_code_out.update({new_item: ic})
            df_cur["Item Code"] = ic
            
            # update count
            vec_new = frac*np.array(df_cur[field_stock])
            vec_new = (
                np.round(vec_new)
                if round_stocks
                else vec_new
            )
            df_cur[field_stock] = vec_new
            
            # add to output
            df_split_out.append(df_cur)
            
            
            # update attributeand move to next iteration
            dem_cat_new = dict_to_sisepuede_cat.get(new_item, dem_cat)
            attr_new[attribute_cw_fao_cats.key].append(new_item)
            attr_new["sisepuede_demand_subsector"].append(subsec)
            attr_new["sisepuede_demand_category"].append(dem_cat_new)
            attr_new["agg_to_fao_category"].append(agg_to_fao_cat)
            
            item_code_new += 1
            
        df_split_out = pd.concat(df_split_out, axis = 0)
        df_out.append(df_split_out)
        
        
    # build new attribute table
    
    attr_tab_out = None
    
    if update_attribute:
        
        df_attr_new = attribute_cw_fao_cats.table
        df_attr_new = (
            df_attr_new[
                ~df_attr_new[attribute_cw_fao_cats.key].isin(attr_keys_drop)
            ]
            .reset_index(drop = True)
        )
    
        attr_new = (
            pd.concat(
                [
                    df_attr_new, 
                    pd.DataFrame(attr_new)
                ],
                axis = 0
            )
            .sort_values(by = ["sisepuede_demand_subsector", "sisepuede_demand_category", attribute_cw_fao_cats.key])
            .reset_index(drop = True)
        )
        attr_tab_out = AttributeTable(attr_new, attribute_cw_fao_cats.key)
    
    # concat output dataframe
    df_out = pd.concat(df_out, axis = 0)
    
    return df_out, attr_tab_out, dict_item_to_item_code_out




# set some fields for working with crosswalk
flag_none = "NONE"
field_cat = "fao_category"
field_subsec = "sisepuede_demand_subsector"
field_demand_cat = "sisepuede_demand_category"

# load data
dir_faostat = "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/ingestion/FAOSTAT/"
df_ag_production = get_faostat_data(dir_faostat, "Production_Crops_Livestock_E_All_Data_(Normalized)")
df_ag_trade = get_faostat_data(dir_faostat, "Trade_Crops_Livestock_E_All_Data_(Normalized)")
df_cw_fao = pd.read_csv(sa.fp_csv_cw_fao_crops)
attribute_cw_fao_cats = AttributeTable(
    sa.fp_csv_cw_fao_product_demand_categories_for_ie,
    field_cat, 
    []
)



# set some attributes
attr_region = sa.model_attributes.dict_attributes.get("region")
attr_time_period = sa.model_attributes.dict_attributes.get("dim_time_period")
model_afolu = mafl.AFOLU(sa.model_attributes)

# get region codes
region_codes_all = attr_region.key_values
dict_area_codes = attr_region.field_maps.get("region_to_fao_area_code")
region_codes_all = [dict_area_codes.get(x, None) for x in region_codes_all]



##  SPLIT CATTLE INTO DAIRY/NON-DAIRY

# use brazil as basdline fraction assumption
# https://apps.fas.usda.gov/newgainapi/api/Report/DownloadReportByFileName?fileName=Livestock%20and%20Products%20Annual_Brasilia_Brazil_08-15-2021.pdf
num_dairy = 43750
num_nondairy = 58100
denom = num_dairy + num_nondairy
frac_dairy = num_dairy/denom
frac_nondairy = num_nondairy/denom


# iterate to split trade/production based on assumed fractions
item_code_start = max(list(df_ag_production["Item Code"])) + 1       
dict_fracs_cattle_split = {
    "Cattle": {
        "Dairy Cattle": frac_dairy,
        "Non Dairy Cattle": frac_nondairy
    }
}
dict_cattle_new_to_ssp_cat = {
    "Dairy Cattle": "cattle_dairy",
    "Non Dairy Cattle": "cattle_nondairy"
}

# update production & attribute table
df_ag_production, attribute_cw_fao_cats, dict_item_to_item_code_new = split_stocks(
    df_ag_production,
    dict_fracs_cattle_split,
    dict_cattle_new_to_ssp_cat,
    item_code_start,
    attribute_cw_fao_cats,
    field_stock = "Value",
)

# update trade using new attribute (attr_pass is None)
df_ag_trade, attr_pass, dict_ignore = split_stocks(
    df_ag_trade,
    dict_fracs_cattle_split,
    dict_cattle_new_to_ssp_cat,
    item_code_start,
    attribute_cw_fao_cats,
    dict_item_to_code = dict_item_to_item_code_new,
    field_stock = "Value",
    update_attribute = False,
)

# dictionary to map some categories to others
dict_map_fao_cat_to_new_fao_category = sf.build_dict(
    attribute_cw_fao_cats.table[[attribute_cw_fao_cats.key, "agg_to_fao_category"]].dropna()
)




##  FAO Items to drop and keep - rely on attribute_cw_fao_cats

df_cw_product_cats = attribute_cw_fao_cats.table
items_drop = list(
    df_cw_product_cats[
        df_cw_product_cats[field_demand_cat].isin([flag_none])
    ][field_cat]
)
items_keep = list(
    df_cw_product_cats[
        ~df_cw_product_cats[field_demand_cat].isin([flag_none])
    ][field_cat]
)

# get years to keep
years_keep = list(range(2011, 2021))

# filtering dictionaries
dict_filt = {
    "Element": ["Production"],
    "Year": years_keep,
    "Area Code": region_codes_all,
    "Item": list(df_cw_fao["fao_crop"])
}
fields_keep = ["Area", "Item", "Item Code", "Year", "Unit", "Value"]





##  reduce

dict_repl_stocks_0 = {
    "Asses": "mules", 
    #"Cattle": "cattle_nondairy",
    "Dairy Cattle": "cattle_dairy",
    "Non Dairy Cattle": "cattle_nondairy",
    "Chickens": "chickens",
    "Ducks": "chickens",
    "Goats": "goats",
    "Horses": "horses",
    "Mules": "mules",
    "Pigs": "pigs",
    "Sheep": "sheep",
    "Turkeys": "chickens",
    "Camelids, other": "horses",
    "Buffaloes": "buffalo"
}
# stocks/yield - two parts
df_yields = filter_dfs(
    df_ag_production,
    dict_filt,
    fields_keep
).rename(columns =  {"Value": "stocks"})
dict_filt.update({"Item": items_keep, "Element": ["Stocks"]})
df_stocks_0 = filter_dfs(
    df_ag_production,
    dict_filt,
    fields_keep
).rename(columns =  {"Value": "stocks"})
df_stocks = pd.concat([
    df_yields,
    df_stocks_0
], axis = 0).reset_index(drop = True)

# production
dict_filt.update({"Element": ["Production"]})
df_prod = filter_dfs(
    df_ag_production,
    dict_filt,
    fields_keep
).rename(columns =  {"Value": "production"})

# append stocks that are unaccounted for
df_prod_app = df_stocks[
    df_stocks["Item"].isin(set(df_stocks["Item"]) - set(df_prod["Item"]))
].copy().rename(columns = {"stocks": "production"})
df_prod = pd.concat([
    df_prod,
    df_prod_app
], axis = 0).reset_index(drop = True)

# imports
dict_filt.update({"Element": ["Import Quantity"]})
df_imports = filter_dfs(
    df_ag_trade,
    dict_filt,
    fields_keep
).rename(columns =  {"Value": "imports"})

# exports
dict_filt.update({"Element": ["Export Quantity"]})
df_exports = filter_dfs(
    df_ag_trade,
    dict_filt,
    fields_keep
).rename(columns =  {"Value": "exports"})


# convert Heads units to 1000 Heads where present and clean items
dict_dfs = {
    "exports": df_exports,
    "imports": df_imports,
    "production": df_prod,
    "stocks": df_stocks
}
for key in dict_dfs.keys():
    # 
    df = dict_dfs[key]
    vec_adj = np.ones(len(df))
    vec_unit = np.array(df["Unit"])
    vec_vals = np.array(df[key])
    
    w = np.where(vec_unit == "Head")[0]
    if len(w) > 0:
        np.put(vec_adj, w, 0.001)
        np.put(vec_unit, w, "1000 Head")
    df["Unit"] = vec_unit
    df[key] = vec_vals*vec_adj
    
    # clean items
    df["Item"] = [x.strip() for x in list(df["Item"])]
    
    dict_dfs.update({key: df})

    
    
# split out between items associated with stock and derivative goods 
items_stock = list(df_stocks["Item Code"])
dict_dfs_split = {"stocks": {}, "derivatives": {}}

for key in dict_dfs.keys():
    df = dict_dfs[key]
    df_st = df[df["Item Code"].isin(items_stock)].copy().reset_index(drop = True)
    df_dr = df[~df["Item Code"].isin(items_stock)].copy().reset_index(drop = True)
    
    dict_dfs_split["stocks"].update({key: df_st})
    dict_dfs_split["derivatives"].update({key: df_dr})
    

# get composite
dict_repl = attribute_cw_fao_cats.field_maps[f"{field_cat}_to_{field_demand_cat}"]





# get dfs - stock goods
df_agg_stock, df_est_stock = build_df_est(
    dict_dfs_split["stocks"]["exports"],
    dict_dfs_split["stocks"]["imports"], 
    dict_dfs_split["stocks"]["production"], 
    dict_dfs_split["stocks"]["stocks"],
    dict_repl,
    ["exports", "imports", "production", "stocks"],
    ["Area", "Item", "Year", "Unit"],
    dict_map_fao_cat_to_new_fao_category = dict_map_fao_cat_to_new_fao_category,
)

# get data for derivative products
df_agg_deriv, df_est_deriv_0 = build_df_est(
    dict_dfs_split["derivatives"]["exports"],
    dict_dfs_split["derivatives"]["imports"], 
    dict_dfs_split["derivatives"]["production"], 
    dict_dfs_split["derivatives"]["stocks"],
    dict_repl,
    ["exports", "imports", "production", "stocks"],
    ["Area", "Item", "Year", "Unit"], 
    dict_map_fao_cat_to_new_fao_category = dict_map_fao_cat_to_new_fao_category,
)



Unnamed: 0,Area,Item Code,Item,Year,Unit,exports
0,Brazil,2030,Dairy Cattle,2019,1000 Head,214.634
1,Brazil,2031,Non Dairy Cattle,2019,1000 Head,285.034


In [343]:
# 1. calculate demands for stocks (done, available in df_agg_stock, df_est_stock)
# 2. caclculate production ratios for derivative products in dict_dfs_split["derivatives"]
#    - merge derivative production to df_agg_stock *after* replacing item names
#    - calculate demadns for each derivative good
#    - using this ratio, calculate demands in terms of 
# get data for derivative products

df_est_deriv_0["Item_merge"] = df_est_deriv_0["Item"].replace(dict_repl)
df_est_deriv_0.drop(["stocks"], axis = 1, inplace = True) if ("stocks" in df_est_deriv_0.columns) else None

# initialize the data frame for estimating imports/exports
df_est_ie = pd.merge(
    df_est_deriv_0, 
    df_agg_stock[["Area", "Year", "Item", "demand"]].rename(columns = {"demand": "stocks", "Item": "Item_merge"}), 
    how = "left"
)
# drop no production
df_est_ie = df_est_ie[df_est_ie["production"] > 0.0].reset_index(drop = True)



##  MAKE SURE ALL PRODUCTION/EXPORT/IMPORT/DEMANDS ARE IN SAME UNIT (tonnes)

# use "small" value as average, 43g/egg: https://www.dineachook.com.au/blog/how-to-get-bigger-eggs-from-your-chickens-egg-weight-and-size/
# 43g/egg = 0.043 tonne/1000 egg
factor_eggs = 0.043
vec_adj = np.ones(len(df_est_ie))
vec_unit = np.array(df_est_ie["Unit"])

for key in ["exports", "imports", "production", "demand"]:
    
    vec_vals = np.array(df_est_ie[key])
    w = np.where(vec_unit == "1000 No")[0]
    if len(w) > 0:
        np.put(vec_adj, w, factor_eggs)
        np.put(vec_unit, w, "tonnes")
    df_est_ie["Unit"] = vec_unit
    df_est_ie[key] = vec_vals*vec_adj


##  THEN, ADD THE PRODUCTION RATIO, GENERATE IMP/EXP ESTIMATES IN TERMS OF STOCKS, AND TAKE TONNE-WEIGHTED AVERAGE

# production ratio and import/exports as stock equiv
field_ratio = "domestic_production_ratio" # prod/stocks
df_est_ie[field_ratio] = np.nan_to_num(np.array(df_est_ie["production"])/np.array(df_est_ie["stocks"]), 0.0, posinf = 0.0)
for key in ["imports", "exports"]:
    field_new = f"{key}_stock_equivalent"
    df_est_ie[field_new] = np.nan_to_num(np.array(df_est_ie[key])/np.array(df_est_ie[field_ratio]), 0.0, posinf = 0.0)

# get production weights - start with total derivative production
fields_grp = ["Area", "Year", "Item_merge"]
fields_sum = ["production"]
dict_agg = dict(zip(fields_grp, ["first" for x in fields_grp]))
dict_agg.update(dict(zip(fields_sum, ["sum" for x in fields_sum])))
df_est_ie_total_deriv_prod = df_est_ie[
    ~df_est_ie["Item"].isin(list(df_stocks["Item"]))
][fields_grp + fields_sum].groupby(fields_grp).agg(dict_agg).reset_index(drop = True).rename(columns = {"production": "total_derivative_production"})


##  

# merge in to get production weights
df_est_ie = pd.merge(df_est_ie, df_est_ie_total_deriv_prod, how = "left")
df_est_ie["production_weight"] = np.array(df_est_ie["production"])/np.array(df_est_ie["total_derivative_production"])

# now, estimate weighted imports/exports
for key in ["imports", "exports"]:
    field_new = f"weighted_est_{key}_equiv"
    field_stock_equiv = f"{key}_stock_equivalent"
    df_est_ie[field_new] = np.array(df_est_ie[field_stock_equiv])*np.array(df_est_ie["production_weight"])

# aggregate 
fields_grp = ["Area", "Year", "Item_merge"]
fields_sum = ["weighted_est_imports_equiv", "weighted_est_exports_equiv"]
dict_agg = dict(zip(fields_grp, ["first" for x in fields_grp]))
dict_agg.update(dict(zip(fields_sum, ["sum" for x in fields_sum])))
df_est_ie_est_ie_equiv = df_est_ie[fields_grp + fields_sum].groupby(fields_grp).agg(dict_agg).reset_index(drop = True)
df_est_ie_est_ie_equiv.rename(
    columns = {
        "weighted_est_imports_equiv": "est_imports_equiv",
        "weighted_est_exports_equiv": "est_exports_equiv"
    }
)

df_out = pd.merge(
    df_est_ie_est_ie_equiv.rename(columns = {"Item_merge": "Item"}),
    df_agg_stock[["Area", "Item", "Year", "exports", "imports", "stocks"]], 
    how = "outer"
)

df_out.fillna(0, inplace = True)


##  CHECK FRACTIONS, OVERWRITE WHERE ESTIMATES ARE UNREASONABLE

# get some fractions
df_out["exports_fin_est"] = np.array(df_out["exports"]) + np.array(df_out["weighted_est_exports_equiv"])
df_out["imports_fin_est"] = np.array(df_out["imports"]) + np.array(df_out["weighted_est_imports_equiv"])
df_out["domestic_demand_fin_est"] = np.array(df_out["imports_fin_est"]) + np.array(df_out["stocks"]) - np.array(df_out["exports_fin_est"])
df_out["import_frac_of_demand"] = np.nan_to_num(np.array(df_out["imports_fin_est"])/np.array(df_out["domestic_demand_fin_est"]), 0.0)
df_out["export_frac_of_prod"] = np.nan_to_num(np.array(df_out["exports_fin_est"])/np.array(df_out["stocks"]), 0.0)

# set a threshold for acceptable exports; if exceeding the threshold, revert to stock exports
thresh = 0.75
vec_old = np.array(df_out["export_frac_of_prod"])
w = np.where(vec_old > thresh)[0]
if len(w) > 0:
    vec_new = np.nan_to_num(np.array(df_out["exports"])/np.array(df_out["stocks"]), 0, posinf = 0.0)
    vec_repl = vec_new[w]
    np.put(vec_old, w, vec_new[w])
    df_out["export_frac_of_prod"] = vec_old
    
df_out["exports_est"] = np.array(df_out["export_frac_of_prod"])*np.array(df_out["stocks"])

# set a threshold for acceptable imports;
thresh = 0.75
vec_old = np.array(df_out["import_frac_of_demand"])
w = np.where(vec_old > thresh)[0]
if len(w) > 0:
    vec_new = np.nan_to_num(np.array(df_out["imports"])/(np.array(df_out["stocks"]) + np.array(df_out["imports"]) - np.array(df_out["exports"])), 0, posinf = 0.0)
    vec_repl = vec_new[w]
    np.put(vec_old, w, vec_new[w])
    df_out["import_frac_of_demand"] = vec_old


    
# components used to build input fields
attr_agrc = sa.model_attributes.get_attribute_table(sa.model_attributes.subsec_name_agrc)
attr_lvst = sa.model_attributes.get_attribute_table(sa.model_attributes.subsec_name_lvst)
dict_subsec_to_subsec_abv = sa.model_attributes.dict_attributes.get("abbreviation_subsector").field_maps.get("subsector_to_abbreviation_subsector")
dict_subsec_abv_to_subsec = sa.model_attributes.dict_attributes.get("abbreviation_subsector").field_maps.get("abbreviation_subsector_to_subsector")

dict_repl_subsecs = dict([(x, dict_subsec_to_subsec_abv.get(sa.model_attributes.subsec_name_agrc)) for x in attr_agrc.key_values])
dict_repl_subsecs.update(dict([(x, dict_subsec_to_subsec_abv.get(sa.model_attributes.subsec_name_lvst)) for x in attr_lvst.key_values]))





  df_est_ie[field_ratio] = np.nan_to_num(np.array(df_est_ie["production"])/np.array(df_est_ie["stocks"]), 0.0, posinf = 0.0)
  df_est_ie[field_new] = np.nan_to_num(np.array(df_est_ie[key])/np.array(df_est_ie[field_ratio]), 0.0, posinf = 0.0)
  df_est_ie[field_new] = np.nan_to_num(np.array(df_est_ie[key])/np.array(df_est_ie[field_ratio]), 0.0, posinf = 0.0)
  df_est_ie[field_new] = np.nan_to_num(np.array(df_est_ie[key])/np.array(df_est_ie[field_ratio]), 0.0, posinf = 0.0)
  df_est_ie[field_new] = np.nan_to_num(np.array(df_est_ie[key])/np.array(df_est_ie[field_ratio]), 0.0, posinf = 0.0)
  df_out["import_frac_of_demand"] = np.nan_to_num(np.array(df_out["imports_fin_est"])/np.array(df_out["domestic_demand_fin_est"]), 0.0)
  df_out["export_frac_of_prod"] = np.nan_to_num(np.array(df_out["exports_fin_est"])/np.array(df_out["stocks"]), 0.0)
  df_out["export_frac_of_prod"] = np.nan_to_num(np.array(df_out["exports_fin_est"])/np.array(df_out["stocks"]), 0.0)
  vec_new = np.nan_to_num(np.array(df

In [344]:
df_est_ie[
    (df_est_ie["Area"] == "Brazil") & 
    (df_est_ie["Year"] == 2015)
].sort_values(by = ["exports"], ascending = False).iloc[0:20]

Unnamed: 0,exports,imports,production,Area,Item,Year,Unit,Item Code,demand,Item_merge,stocks,domestic_production_ratio,imports_stock_equivalent,exports_stock_equivalent,total_derivative_production,production_weight,weighted_est_imports_equiv,weighted_est_exports_equiv
8411,18927791.0,8.0,35194000.0,Brazil,Sugar Raw Centrifugal,2015,tonnes,162,16266217.0,sugar_cane,750290300.0,0.046907,170.549588,403515900.0,49194000.0,0.715412,122.013299,288680300.0
8110,8455613.0,8111.0,27273632.0,Brazil,"Meat, Poultry",2015,tonnes,1808,18826130.0,chickens,1343852.0,20.295116,399.652806,416632.9,34397010.0,0.792907,316.887575,330351.2
8119,2852512.0,102511.0,18850000.0,Brazil,"Meat, cattle",2015,tonnes,867,16099999.0,cattle_nondairy,122651.8,153.687092,667.011124,18560.52,21312040.0,0.884477,589.955683,16416.34
8306,1669949.0,25284.0,8075000.0,Brazil,"Oil, soybean",2015,tonnes,237,6430335.0,other_annual,43966070.0,0.183664,137664.159578,9092395.0,136200800.0,0.059287,8161.758638,539065.0
8366,883978.0,348513.0,8204901.0,Brazil,"Rice, paddy (rice milled equivalent)",2015,tonnes,30,7669436.0,rice,12130690.0,0.676375,515265.718564,1306934.0,8204901.0,1.0,515265.718564,1306934.0
7981,834253.0,2148.0,1626716.0,Brazil,Cotton lint,2015,tonnes,767,794611.0,fibers,4021447.0,0.40451,5310.126756,2062379.0,1626716.0,1.0,5310.126756,2062379.0
8146,569816.0,501.0,3430734.0,Brazil,"Meat, pig",2015,tonnes,1035,2861419.0,pigs,39792.05,86.216562,5.810948,6609.125,4508469.0,0.760953,4.42186,5029.235
7939,145205.0,59001.0,13857500.0,Brazil,Beer of barley,2015,tonnes,51,13771296.0,cereals,69025310.0,0.20076,293888.687881,723277.7,119980900.0,0.115498,33943.427674,83536.81
8282,131674.0,209069.0,400000.0,Brazil,"Oil, palm",2015,tonnes,257,477395.0,other_woody_perennial,1892834.0,0.211323,989332.278865,623092.6,80356500.0,0.004978,4924.715569,3101.641
8198,127617.0,2894.0,1211786.0,Brazil,"Offals, edible, cattle",2015,tonnes,868,1087063.0,cattle_nondairy,122651.8,9.879887,292.918336,12916.85,21312040.0,0.056859,16.655108,734.4419


In [345]:
# format items as fields
def build_field(item: str, field_type: str) -> str:
    
    subsec_abv = dict_repl_subsecs.get(item)
    subsec = dict_subsec_abv_to_subsec.get(subsec_abv)
    
    if subsec_abv == "agrc":
        modvar = model_afolu.modvar_agrc_frac_demand_imported if (field_type == "imports") else model_afolu.modvar_agrc_equivalent_exports
    elif subsec_abv == "lvst":
        modvar = model_afolu.modvar_lvst_frac_demand_imported if (field_type == "imports") else model_afolu.modvar_lvst_equivalent_exports
    
    out = sa.model_attributes.build_varlist(subsec, modvar, restrict_to_category_values = [item])
    
    return out[0]

df_out["field_imports"] = df_out["Item"].apply(build_field, field_type = "imports")
df_out["field_exports"] = df_out["Item"].apply(build_field, field_type = "exports")


In [348]:
df_out[df_out["Area"].isin(["Brazil"]) & df_out["Item"].isin(["cattle_dairy"])]

Unnamed: 0,Area,Year,Item,weighted_est_imports_equiv,weighted_est_exports_equiv,exports,imports,stocks,exports_fin_est,imports_fin_est,domestic_demand_fin_est,import_frac_of_demand,export_frac_of_prod,exports_est,field_imports,field_exports
3052,Brazil,2011,cattle_dairy,318.788238,94.067724,173.906,2.26,91415.512,267.973724,321.048238,91468.586514,0.00351,0.002931,267.973724,frac_lvst_livestock_demand_imported_cattle_dairy,exports_lvst_cattle_dairy
3071,Brazil,2012,cattle_dairy,351.868979,96.04985,216.537,0.117,90755.619,312.58685,351.985979,90795.018129,0.003877,0.003444,312.58685,frac_lvst_livestock_demand_imported_cattle_dairy,exports_lvst_cattle_dairy
3090,Brazil,2013,cattle_dairy,292.848862,90.738692,293.788,0.081,90964.043,384.526692,292.929862,90872.44617,0.003224,0.004227,384.526692,frac_lvst_livestock_demand_imported_cattle_dairy,exports_lvst_cattle_dairy
3109,Brazil,2014,cattle_dairy,170.642387,193.209996,277.825,4.377,91222.565,471.034996,175.019387,90926.54939,0.001925,0.005164,471.034996,frac_lvst_livestock_demand_imported_cattle_dairy,exports_lvst_cattle_dairy
3128,Brazil,2015,cattle_dairy,264.61868,177.272392,91.144,0.763,92448.672,268.416392,265.38168,92445.637288,0.002871,0.002903,268.416392,frac_lvst_livestock_demand_imported_cattle_dairy,exports_lvst_cattle_dairy
3147,Brazil,2016,cattle_dairy,476.144089,129.149565,125.651,1.325,93724.557,254.800565,477.469089,93947.225523,0.005082,0.002719,254.800565,frac_lvst_livestock_demand_imported_cattle_dairy,exports_lvst_cattle_dairy
3166,Brazil,2017,cattle_dairy,302.41918,84.497175,174.985,0.103,92355.489,259.482175,302.52218,92398.529005,0.003274,0.00281,259.482175,frac_lvst_livestock_demand_imported_cattle_dairy,exports_lvst_cattle_dairy
3185,Brazil,2018,cattle_dairy,273.988198,47.423837,280.623,0.014,91842.545,328.046837,274.002198,91788.50036,0.002985,0.003572,328.046837,frac_lvst_livestock_demand_imported_cattle_dairy,exports_lvst_cattle_dairy
3204,Brazil,2019,cattle_dairy,0.322961,5.085347,214.634,0.015,92207.835,219.719347,0.337961,91988.453613,4e-06,0.002383,219.719347,frac_lvst_livestock_demand_imported_cattle_dairy,exports_lvst_cattle_dairy


In [349]:
df_out_wide = pd.concat([
    df_out[["Area", "Year", "import_frac_of_demand", "field_imports"]].rename(columns = {"import_frac_of_demand": "value", "field_imports": "field"}),
    df_out[["Area", "Year", "export_frac_of_prod", "field_exports"]].rename(columns = {"export_frac_of_prod": "value", "field_exports": "field"})
], axis = 0).reset_index(drop = True)

df_out_wide = pd.pivot(
    df_out_wide,
    ["Area", "Year"],
    ["field"],
    ["value"]
).reset_index()
df_out_wide.columns = df_out_wide.columns.to_flat_index()

# rename
cols_old = list(df_out_wide.columns)
cols = []

for c in cols_old:
    if c[0] == "value":
        cols.append(c[1])
    else:
        cols.append(c[0])
dict_rnm = dict(zip(cols_old, cols))
df_out_wide.rename(columns = dict_rnm, inplace = True)
df_out_wide.rename(columns = {"Year": "year", "Area": "Nation"}, inplace = True)


  df_out_wide = pd.pivot(


In [703]:
# read path to formatted input data
df_input_data = pd.read_csv("/Users/jsyme/Downloads/data_complete_future_2022_08_24_test1-4.csv")

In [355]:
df_out_wide.to_csv(sa.fp_csv_afolu_import_exports, index = None, encoding = "UTF-8")

In [17]:
df_out_wide.columns

Index(['Nation', 'year', 'exports_agrc_bevs_and_spices_tonne',
       'exports_agrc_cereals_tonne', 'exports_agrc_fibers_tonne',
       'exports_agrc_fruits_tonne',
       'exports_agrc_herbs_and_other_perennial_crops_tonne',
       'exports_agrc_nuts_tonne', 'exports_agrc_other_annual_tonne',
       'exports_agrc_other_woody_perennial_tonne', 'exports_agrc_pulses_tonne',
       'exports_agrc_rice_tonne', 'exports_agrc_sugar_cane_tonne',
       'exports_agrc_tubers_tonne', 'exports_agrc_vegetables_and_vines_tonne',
       'exports_lvst_buffalo', 'exports_lvst_cattle_dairy',
       'exports_lvst_cattle_nondairy', 'exports_lvst_chickens',
       'exports_lvst_goats', 'exports_lvst_horses', 'exports_lvst_mules',
       'exports_lvst_pigs', 'exports_lvst_sheep',
       'frac_agrc_crop_demand_imported_bevs_and_spices',
       'frac_agrc_crop_demand_imported_cereals',
       'frac_agrc_crop_demand_imported_fibers',
       'frac_agrc_crop_demand_imported_fruits',
       'frac_agrc_crop_demand