In [7]:
import os, os.path
import numpy as np
import pandas as pd
#from model_attributes import *
import model_attributes as ma
import model_afolu as mafl
import model_ippu as mi
import model_circular_economy as mc
import model_electricity as ml
import model_energy as me
import model_socioeconomic as se
import setup_analysis as sa
import sisepuede_data_api as api
import sisepuede_models as sm
import support_classes as sc
import support_functions as sf
import importlib
import time
import warnings
import matplotlib.pyplot as plt
from typing import Union
import inspect
import ingestion as ing
import logging
import re

warnings.filterwarnings("ignore")

# Pull some data from IPCC for $F_{MG}$ (land use management factor)
- For factors, see Volume 4 (AFOLU)

    - Chapter 5, Table 5.5 (Croplands)
    
    - Chapter 6, Table 6.2 (Grasslands)

# Pull land use conservation agriculture fractions from 2018_kassam_et_al
- Table 2 gives average fractions by region
- Tables 3-8 give areas by select countries per region

In [141]:
importlib.reload(sc)

# map SISEPUEDE regions to kassam regions
def regions_to_kassam_region(
    return_type: str = "region_to_kassam_region",
) -> dict:
    """
    Map regions from SISEPUDE to regions in kassam
    
    Keyword Arguments
    -----------------
    - return_type: string mapping desired output:
        * "region_to_kassam_region": map SISEPUEDE region to Kassam et al. region
    """
    
    
    dict_wb_region_to_region_kassam = {
        "East Asia & Pacific": "Asia",
        "Europe & Central Asia": "Europe",
        "Latin America & Caribbean": "South America",
        "Middle East & North Africa": "Asia", # THIS IS MOSTLY MIDDLE EAST, THOUGH INCLUDES ALGERIA AND TUNISIA
        "North America": "North America",
        "South Asia": "Asia",
        "Sub-Saharan Africa": "Africa",
    }
    
    dict_region_to_kassam_region = dict(
        (k, dict_wb_region_to_region_kassam.get(v))
        for k, v in regions.dict_region_to_wb_region.items()
    )
    
    # do some updates
    dict_region_to_kassam_region.update({
        "australia": "Australia & NZ",
        "new_zealand": "Australia & NZ",
        "russia": "Russia & Ukraine",
        "ukraine": "Russia & Ukraine",
    })
    
    return dict_region_to_kassam_region



##  COMPONENTS FOR READING TABLES

# some directories

dir_data_afolu = "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/AFOLU/"
dir_read = "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/AFOLU/2018_kassam_et_al"
dir_repo_data = "/Users/jsyme/Documents/Projects/git_jbus/sisepuede_data"


# some regular expressions

regex_tables = re.compile("T000(\d)-10.1080%2F00207233.2018.1494927")

# some file paths

fns_read_kassam = [x for x in os.listdir(dir_read) if regex_tables.match(x) is not None]
fp_kcc_cw = os.path.join(dir_data_afolu, "values_info_with_cw_kc_1984_2013.csv")


# some derivative classes

model_afolu = mafl.AFOLU(sa.model_attributes)
model_socioeconomic = model_afolu.model_socioeconomic

regions = sc.Regions(sa.model_attributes)
time_periods = sc.TimePeriods(sa.model_attributes)
repo = api.SISEPUEDEBatchDataRepository(
    dir_repo_data,
    sa.model_attributes
)


# some fields

field_country = "Country"
field_crop_area = "crop_area_ha"
field_crop_area_ca = "crop_area_ha_cons_agrc"
field_frac_ca = "frac_crops_cons_agrc"
field_region_kassam = "region_kasssam"


# some lists

years_hist = list(range(2010, 2021))
years_proj = [x for x in time_periods.all_years if x not in years_hist]





# read tables

# climate classification counts and crosswalk to type
df_climate = pd.read_csv(sa.fp_csv_kcc_cell_counts_by_country_kcc)
df_climate.rename(columns = {"ISO_A3": regions.field_iso}, inplace = True)
df_kcc_cw = pd.read_csv(fp_kcc_cw, sep = "|")
df_kcc_cw.rename(columns = {"code_num": "kcc"}, inplace = True)

# SISEPUEDE base data read
df_inputs = repo.read(
    [
        model_afolu.modvar_agrc_area_prop_init,
        model_afolu.modvar_lndu_initial_frac,
        model_socioeconomic.modvar_gnrl_area
    ],
    add_time_periods = True
);

# some field replacement
dict_repl = {
    "CA area2008/09": 2008,
    "CA area2013/14": 2013,
    "CA area2015/16": 2015,
}
all_fields_year = sorted(list(dict_repl.values()))


# input tables from Kassam et al. 2018
dict_tables_kassam = {}
for fn in fns_read_kassam:
    # 
    fp = os.path.join(dir_read, fn)
    key = regex_tables.match(fn)
    key = int(key.groups()[0])
    
    # import and clean
    df = pd.read_csv(fp)
    df.rename(columns = dict_repl, inplace = True)
    
    # replace
    for k in all_fields_year:
        if k not in df.columns:
            continue
            
        vec = list(df[k])
        
        for i, x in enumerate(vec):
            x = str(x).replace("#", "").replace(",", "").strip()
            try: 
                x = float(x)
            except:
                x = np.nan

            vec[i] = x*1000
            
        df[k] = vec

    
    # clean up
    df = (
        (
            pd.melt(
                df,
                [field_country],
                all_fields_year,
                var_name = time_periods.field_year,
                value_name = field_crop_area_ca,
            )
            .rename(columns = {field_country: regions.key})
        )
        if (field_country in df.columns)
        else df
    )
    
    # add iso
    flag_drop = "DROP"
    df = regions.data_func_try_isos_from_countries(
        df, 
        missing_iso_flag = flag_drop,
        return_modified_df = True
    )
    
    df = (
        (
            df[
                ~df[regions.field_iso].isin([flag_drop])
            ]
            .drop(regions.key, axis = 1)
            .reset_index(drop = True)
            .dropna()
        )
        if regions.field_iso in df.columns
        else df
    )
    
    dict_tables_kassam.update({key: df})



##  BUILD SOME DATA 

df_crop_area_ca_kassam = pd.concat(
    [
        x for x in dict_tables_kassam.values()
        if field_crop_area_ca in x.columns
    ]
)


dict_region_kassam_to_regional_default = sf.build_dict(
    dict_tables_kassam.get(2)[["Region", "Per cent ofCroplandarea in the region"]]
)




# get some fields
field_area = sa.model_attributes.build_varlist(
    None,
    model_socioeconomic.modvar_gnrl_area
)[0]

field_crop_frac = sa.model_attributes.build_varlist(
    None,
    model_afolu.modvar_lndu_initial_frac,
    restrict_to_category_values = [model_afolu.cat_lndu_crop]
)[0]


field_iso = "iso_code3"
df_area_crops = (
    df_inputs[[field_iso, time_periods.field_year, field_area, field_crop_frac]]
    .dropna()
    .rename(columns = {field_iso: regions.field_iso})
    .reset_index(drop = True)
)

df_area_crops[field_crop_area] = np.array(df_area_crops[field_area])*np.array(df_area_crops[field_crop_frac])




##  COMBINE AND GET FRACTIONS WHERE OBSERVATIONS ARE AVAILABLE

df_fracs_ca_obs = (
    pd.merge(
        df_area_crops,
        df_crop_area_ca_kassam
    )
)

df_fracs_ca_obs[field_frac_ca] = sf.vec_bounds(
    np.array(df_fracs_ca_obs[field_crop_area_ca])/np.array(df_fracs_ca_obs[field_crop_area]),
    (0, 1)
)

df_fracs_ca_obs.drop([field_area, field_crop_frac, field_crop_area, field_crop_area_ca], axis = 1, inplace = True)
df_fracs_ca_obs.drop_duplicates(inplace = True)

df_fracs_ca_obs0 = df_fracs_ca_obs.copy()

##  BUILD AND INTERPOLATE OBSERVATIONS TO MATCH HISTORICAL DATA

def expand_and_interpolate_obs(
    df_obs: pd.DataFrame, #df_fracs_ca_obs
) -> pd.DataFrame:
    
    df_out = sf.explode_merge(
        regions.get_regions_df(include_iso = True),
        pd.DataFrame({time_periods.field_year: years_hist})
    )
    
    df_out = pd.merge(
        df_out,
        df_obs,
        how = "left"
    )
    
    df_out = df_out[
        df_out[regions.field_iso].isin(
            set(df_obs[regions.field_iso])
        )
    ]

    df_grp = df_out.groupby([regions.field_iso])
    df_ret = []
    
    for iso, df in df_grp:
        
        try:
            df.interpolate(method = "polynomial", order = 2, limit = 20, limit_direction = "both", inplace = True)
        except:
            None
        df.interpolate(method = "bfill", inplace = True)
        df.interpolate(method = "ffill", inplace = True)
        
        df_ret.append(df)
    
    df_ret = pd.concat(df_ret, axis = 0).reset_index(drop = True)
    
    return df_ret

df_fracs_ca_obs = (
    expand_and_interpolate_obs(df_fracs_ca_obs)
    .drop([regions.key], axis = 1)
)




###  COMBINE AVERAGES + OBSERVATIONS TO BUILD AGGREGATE DF

# initialize all isos
df_fracs_ca = regions.get_regions_df(include_iso = True)

# get values for those that need regional averages
df_fracs_ca_region_avgs = (
    df_fracs_ca[
        ~df_fracs_ca[regions.field_iso].isin(
            set(df_fracs_ca_obs[regions.field_iso])
        )
    ]
    .reset_index(drop = True)
)

df_fracs_ca_region_avgs[field_region_kassam] = (
    df_fracs_ca_region_avgs[regions.key]
    .replace(
        regions_to_kassam_region()
    )
)

df_fracs_ca_region_avgs[field_frac_ca] = (
    df_fracs_ca_region_avgs[field_region_kassam]
    .replace(
        dict_region_kassam_to_regional_default
    )
)

df_fracs_ca_region_avgs[field_frac_ca] = np.array(df_fracs_ca_region_avgs[field_frac_ca]).astype(float)/100
df_fracs_ca_region_avgs = df_fracs_ca_region_avgs[[regions.field_iso, field_frac_ca]]
df_fracs_ca_region_avgs[time_periods.field_year] = 2015 # regional averages from 2015/2016

df_fracs_ca = pd.concat(
    [
        df_fracs_ca_obs,
        df_fracs_ca_region_avgs[df_fracs_ca_obs.columns]
    ],
    axis = 0
)



##  EXPAND AND INTERPOLATE FORWARD/BACKWARD


def expand_and_interpolate_all_ca_fracs(
    df_fracs: pd.DataFrame, #df_fracs_ca_obs
) -> pd.DataFrame:
    
    year_min = min(min(years_hist), min(time_periods.all_years))
    year_max = max(max(years_hist), max(time_periods.all_years))
    
    # initialize only as years (all regions accounted for in construction)
    df_out = sf.explode_merge(
        regions.get_regions_df(include_iso = True).drop([regions.key], axis = 1),
        pd.DataFrame({time_periods.field_year: list(range(year_min, year_max + 1))})
    )

    
    df_out = pd.merge(
        df_out,
        df_fracs,
        how = "left"
    )

    df_grp = df_out.groupby([regions.field_iso])
    df_ret = []
    
    for iso, df in df_grp:

        df.interpolate(method = "bfill", inplace = True)
        df.interpolate(method = "ffill", inplace = True)
        
        df_ret.append(df)
    
    df_ret = (
        pd.concat(df_ret, axis = 0)
        .sort_values(by = [regions.field_iso, time_periods.field_year])
        .reset_index(drop = True)
    )
    
    return df_ret

"""
df_fracs_ca = (
    df_fracs_ca[
        df_fracs_ca[time_periods.field_year].isin([2015])
    ]
    .sort_values(by = [regions.field_iso])
    .reset_index(drop = True)
)
""";

df_fracs_ca = expand_and_interpolate_all_ca_fracs(df_fracs_ca)

# TEMPORARY: assign uniform fraction to certain crops and ignore others
- Needs to be modified to 
    1. get fraction of crops associated with conservation + no-till
    2. find fraction of those crops that corresponds with the specified fraction from Kassam et al.
- Kassam et al. (2018) note that the fractions they provide are for *all cropland*

In [146]:
# BUILD CRUDELY FOR NOW (THIS IS WRONG AND AN UNDERESTIMATE)
# 

df_fracs_no_till = df_fracs_ca.copy()

fields_copy = sa.model_attributes.build_varlist(
    None,
    model_afolu.modvar_agrc_frac_no_till
)

for field in fields_copy:
    df_fracs_no_till[field] = df_fracs_no_till[field_frac_ca]

df_fracs_no_till.drop([field_frac_ca], axis = 1, inplace = True)

df_fracs_no_till.to_csv(
    sa.fp_csv_frac_no_till, 
    index = None,
    encoding = "UTF-8"
)

In [163]:
# write to repository
dicts_write = repo.write_from_rbd(
    sa.dir_ref_batch_data,
    years_hist,
    fps_include = [sa.fp_csv_frac_no_till]
)



DataFrame successfully written to '/Users/jsyme/Documents/Projects/git_jbus/sisepuede_data/AFOLU/frac_agrc_no_till_cereals/input_to_sisepuede/historical/frac_agrc_no_till_cereals.csv'
DataFrame successfully written to '/Users/jsyme/Documents/Projects/git_jbus/sisepuede_data/AFOLU/frac_agrc_no_till_cereals/input_to_sisepuede/projected/frac_agrc_no_till_cereals.csv'
DataFrame successfully written to '/Users/jsyme/Documents/Projects/git_jbus/sisepuede_data/AFOLU/frac_agrc_no_till_fibers/input_to_sisepuede/historical/frac_agrc_no_till_fibers.csv'
DataFrame successfully written to '/Users/jsyme/Documents/Projects/git_jbus/sisepuede_data/AFOLU/frac_agrc_no_till_fibers/input_to_sisepuede/projected/frac_agrc_no_till_fibers.csv'
DataFrame successfully written to '/Users/jsyme/Documents/Projects/git_jbus/sisepuede_data/AFOLU/frac_agrc_no_till_other_annual/input_to_sisepuede/historical/frac_agrc_no_till_other_annual.csv'
DataFrame successfully written to '/Users/jsyme/Documents/Projects/git_jbus/

({'frac_agrc_no_till_cereals': {'historical':       Year iso_code3  frac_agrc_no_till_cereals
   0     2010       ABW                   0.632000
   1     2011       ABW                   0.632000
   2     2012       ABW                   0.632000
   3     2013       ABW                   0.632000
   4     2014       ABW                   0.632000
   ...    ...       ...                        ...
   2360  2016       ZWE                   0.035932
   2361  2017       ZWE                   0.035932
   2362  2018       ZWE                   0.035932
   2363  2019       ZWE                   0.035932
   2364  2020       ZWE                   0.035932
   
   [2365 rows x 3 columns],
   'projected':       Year iso_code3  frac_agrc_no_till_cereals
   0     2021       ABW                   0.632000
   1     2022       ABW                   0.632000
   2     2023       ABW                   0.632000
   3     2024       ABW                   0.632000
   4     2025       ABW                   0.6

# Generate land management and input suprema by country

In [174]:
low = np.mean([1.17*0.91, 1.16*0.6])
high = np.mean([1.17*1.09, 1.16*1.4])
nom = 1.165

np.round(low/nom, de

0.755665236051502

In [210]:
field_boreal = "boreal_cat"
field_count = "count"
field_factor = "factor"
field_frac = "frac"
field_kcc = "kcc"
field_temp_trop = "temperate_tropical_cat"
field_wd = "wet_dry_cat"



df_kcc_cw[field_boreal] = [
    ("boreal" if ("boreal" in x.lower()) else "not_boreal")
    for x in list(df_kcc_cw["name"])
]


##  SET F_MG FOR GRASSLANDS

#
dict_grassland_fmg_by_factor_name = {
    "temperate": 1.14, #+/- 0.11),
    "tropical": 1.165 #+/- 0.25)
}
dict_filt_grassland_fmg_codes_by_factor_name = {
    "temperate": {
        field_temp_trop: ["temperate"]
    },
    "tropical": {
        field_temp_trop: ["tropical"]
    }
}

df_climate[field_kcc] = np.array(df_climate[field_kcc]).astype(int)
df_grp = df_climate.groupby([regions.field_iso])
df_climate_by_country_grassland = []

for iso, df in df_grp:
    df = pd.merge(df, df_kcc_cw, how = "left")
    
    df_agg = sf.simple_df_agg(
        df[[field_temp_trop, field_count]],
        [field_temp_trop],
        {
            field_count: "sum"
        }
    )

    vec = np.array(df_agg[field_count])
    vec = vec/vec.sum()

    df_agg[field_frac] = vec
    df_agg[field_factor] = df_agg[field_temp_trop].replace(dict_grassland_fmg_by_factor_name)
    
    total = np.dot(np.array(df_agg[field_factor]), np.array(df_agg[field_frac]))
    
    df_climate_by_country_grassland.append((iso, total))
    
df_climate_by_country_grassland = pd.DataFrame(df_climate_by_country_grassland, columns = [regions.field_iso_3, ])

In [211]:
#df[[field_temp_trop, field_count]].groupby([field_temp])
df_climate_by_country_grassland

[('ABW', 1.14),
 ('AFG', 1.14),
 ('AGO', 1.1496043736214656),
 ('ALB', 1.14),
 ('AND', 1.14),
 ('ARE', 1.14),
 ('ARG', 1.14),
 ('ARM', 1.14),
 ('ASM', 1.165),
 ('ATG', 1.165),
 ('AUS', 1.1420802079192045),
 ('AUT', 1.14),
 ('AZE', 1.14),
 ('BDI', 1.1582059129776292),
 ('BEL', 1.14),
 ('BEN', 1.164983811470937),
 ('BFA', 1.1599475465572164),
 ('BGD', 1.1561250781344083),
 ('BGR', 1.14),
 ('BHR', 1.14),
 ('BHS', 1.163385240135576),
 ('BIH', 1.14),
 ('BLR', 1.14),
 ('BLZ', 1.165),
 ('BMU', 1.14),
 ('BOL', 1.1547160997322548),
 ('BRA', 1.1611880735882474),
 ('BRB', 1.165),
 ('BRN', 1.165),
 ('BTN', 1.14),
 ('BWA', 1.14),
 ('CAF', 1.165),
 ('CAN', 1.14),
 ('CHE', 1.14),
 ('CHL', 1.140002723074242),
 ('CHN', 1.140067254516088),
 ('CIV', 1.165),
 ('CMR', 1.1642342878413914),
 ('COD', 1.1636432617313395),
 ('COG', 1.165),
 ('COL', 1.16200619729948),
 ('COM', 1.161889342172361),
 ('CPV', 1.14),
 ('CRI', 1.1619743145028079),
 ('CUB', 1.1648864255860438),
 ('CUW', 1.141763110307414),
 ('CYM', 1.1

In [402]:
def format_strategy_as_wide_by_component_transformations(
    model_attributes: ma.ModelAttributes,
    delim: str = ":",
    field_baseline: str = "baseline_strategy_id",
    field_code: str = "strategy_code",
    field_description: str = "description",
    field_strategy: str = "strategy",
    flags_ignore: list = ["ALL", "BUNDLE", "_PLUR", "_REP"],
) -> Union[pd.DataFrame, None]:
    """
    Format the strategy attribute table as wide by components
    
    Function Arguments
    ------------------
    - model_attributes: ma.ModelAttributes object used to access strategies and subsectors

    Keyword Arguments
    -----------------
    - delim: delimiter used in strategies and transformations
    - field_baseline: field containing the baseline strategy binary
    - field_code: field in strategy attribute table containing the strategy code
    - field_description: field containing strategy description
    - field_strategy: field containing the strategy name
    - flags_ignore: substrings in strategy codes to use to ignore
    """
    
    # some components
    attr_strat = sa.model_attributes.dict_attributes.get(f"dim_{sa.model_attributes.dim_strategy_id}")
    attr_subsec = sa.model_attributes.dict_attributes.get(f"abbreviation_subsector")
    
    
    df = attr_strat.table.copy()
    fields_req = [
        field_code
    ]
    
    if not sf.check_fields(df, fields_req):
        return None
    
    # codes to keep
    codes_keep = [
        x for x in list(attr_strat.table[field_code])
        if str(x).split(delim)[0].lower() in attr_subsec.key_values
        and not any([y in x for y in flags_ignore if isinstance(y, str)])
    ]
    codes_keep += [
        "LNDU:PLUR"
    ]
    codes_keep.sort()
    
    # add additional
    prepend_tx = f"TX{delim}"
    transformations_keep = [f"{prepend_tx}{x}" for x in codes_keep]
    transformations_add = [x for x in transformations_keep if x not in df.columns]
    df[transformations_add] = 0
    
    for i in range(len(df)):
        code = str(df[field_code].iloc[i])
        tx = f"{prepend_tx}{code}"
        if tx in df.columns:
            df[tx].iloc[i] = 1
    
    df.drop(
        [
            model_attributes.dim_strategy_id,
            field_baseline,
            field_description,
            field_strategy
        ],
        axis = 1,
        inplace = True,
    )
    
    return df
    
df_out = format_strategy_as_wide_by_component_transformations(sa.model_attributes)   