In [11]:
import os, os.path
import numpy as np
import pandas as pd
import model_attributes as ma
from attribute_table import AttributeTable
import setup_analysis as sa
import support_functions as sf
import importlib
import time
import warnings
import matplotlib.pyplot as plt



In [12]:
# read in fertilizer data from IFA Database
df_data = pd.read_excel("/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/AFOLU/IFADATA Plant Nutrition query - 06-Aug-2022_01.30.xlsx", skiprows = 2)
# check forest products
encode = "ISO-8859-1"
df_trade = pd.read_csv(os.path.join(dir_faostat, "Trade_CropsLivestock_E_All_Data_(Normalized)", "Trade_Crops_Livestock_E_All_Data_(Normalized).csv"),
    encoding = encode
)

In [15]:

df_trade = pd.read_csv(os.path.join(dir_faostat, "Trade_CropsLivestock_E_All_Data_(Normalized)", "Trade_Crops_Livestock_E_All_Data_(Normalized).csv"),
    encoding = encode
)
#df_trade["Element"].unique()
df_trade = df_trade[
    df_trade["Year"].isin([2018, 2019])
].reset_index(drop = True)


array(['Export Quantity', 'Export Value', 'Import Quantity',
       'Import Value'], dtype=object)

In [165]:


##  format the regional data
def format_regional_data(
    df_data_by_region: pd.DataFrame,
    fields_ind: list = ["Country", "Year"],
    fields_values: list = ["Consumption"],
    fields_pivot: list = ["Product"],
    dict_rnm_out: dict = None
) -> pd.DataFrame:
    
    # pivot the data
    df_data_by_region_wide = pd.pivot(df_data_by_region, index = fields_ind, columns = fields_pivot, values = ["Consumption"]).reset_index()
    df_data_by_region_wide.columns = df_data_by_region_wide.columns.to_flat_index()

    # rename
    dict_rnm = dict([((a, x), x.lower().replace("(", "").replace(")", "").replace(" ", "_")) for x in keep_products for a in fields_values])
    dict_rnm.update(
        dict([((x, ""), x.lower()) for x in fields_ind])
    )
    df_data_by_region_wide.rename(columns = dict_rnm, inplace = True)

    # clean some columns
    df_data_by_region_wide["year"] = df_data_by_region_wide["year"].astype(int)
    
    if dict_rnm is not None:
        df_data_by_region_wide.rename(columns = dict_rnm_out, inplace = True)
        
    return df_data_by_region_wide




# some sets
keep_products = set(["Grand Total N", "Urea (N)"])
regions_all = set(sa.model_attributes.dict_attributes["region"].table["category_name"])
regions_available = set(df_data["Country"])
regions_to_allocate = regions_all - regions_available
region_to_allocate = "Others Latin America"

# reduce dataset - piece out to data available bty country and values to allocate proportionally to ag production
df_data = df_data[df_data["Product"].isin(keep_products)].reset_index(drop = True)
df_data_by_region = df_data[df_data["Country"].isin(regions_all)].copy().reset_index(drop = True)
df_data_to_allocate = df_data[df_data["Country"].isin([region_to_allocate])].copy().reset_index(drop = True)

# format fields
field_qty = "qtyinit_soil_synthetic_fertilizer_kt"
field_frac = "frac_soil_synthetic_fertilizer_urea"
dict_rnm = {
    "grand_total_n": field_qty,
    "urea_n": field_frac
}
# get data for which information is available per region
df_data_by_region_wide = format_regional_data(df_data_by_region, dict_rnm_out = dict_rnm)
df_data_by_region_wide[field_frac] = np.array(df_data_by_region_wide[field_frac])/np.array(df_data_by_region_wide[field_qty])


##  do the allocation to countries for which fertilizer data is not available

# get production area by country to allocate fertilizer
df_ag_production_agg = df_ag_production[
    df_ag_production["Element"].isin(["Area harvested"]) &
    df_ag_production["Year"].isin(range(2015, 2020))
][["Area", "Item", "Year", "Value"]];
# grouping fields
fields_grp = ["Area", "Year"]
fields_sum = ["Value"]
dict_agg = dict(zip(fields_grp, ["first" for x in fields_grp]))
dict_agg.update(dict(zip(fields_sum, ["sum" for x in fields_sum])))
# aggregation
df_ag_production_agg = df_ag_production_agg.groupby(fields_grp).agg(dict_agg).reset_index(drop = True)
df_ag_production_agg["Area"] = df_ag_production_agg["Area"].replace({"Bolivia (Plurinational State of)": "Bolivia"})
df_ag_production_agg = df_ag_production_agg[df_ag_production_agg["Area"].isin(regions_to_allocate)].reset_index(drop = True)
# get total area and merge back in, then get fractional allocation
df_ag_production_agg_total = df_ag_production_agg[["Year", "Value"]].groupby(["Year"]).agg({"Year": "first", "Value": "sum"}).reset_index(drop = True).rename(columns = {"Value": "Value_Total"})
df_ag_production_agg = pd.merge(df_ag_production_agg, df_ag_production_agg_total)
df_ag_production_agg["frac_allocation"] = np.array(df_ag_production_agg["Value"])/np.array(df_ag_production_agg["Value_Total"])

# setup the allocation
df_data_to_allocate_wide = format_regional_data(df_data_to_allocate, dict_rnm_out = dict_rnm)
df_data_to_allocate_wide[field_frac] = np.array(df_data_to_allocate_wide[field_frac])/np.array(df_data_to_allocate_wide[field_qty])
df_data_to_allocate_wide.drop(["country"], axis = 1, inplace = True)
# merge in production data
df_ag_production_agg = df_ag_production_agg[["Area", "Year", "frac_allocation"]].rename(columns = {"Area": "country", "Year": "year"})
df_data_to_allocate_wide = pd.merge(df_data_to_allocate_wide, df_ag_production_agg)
df_data_to_allocate_wide[field_qty] = np.array(df_data_to_allocate_wide[field_qty])*np.array(df_data_to_allocate_wide["frac_allocation"])
df_data_to_allocate_wide = df_data_to_allocate_wide[df_data_by_region_wide.columns]

# final dataset
df_data_out = pd.concat([df_data_by_region_wide, df_data_to_allocate_wide], axis = 0).sort_values(by = ["country", "year"]).reset_index(drop = True)
# functio to reformat the country name for integration
def format_country_name(country: str) -> str:
    country_out = country.split("(")[0].strip().lower().replace(" ", "_")
    return country_out
df_data_out["country"] = df_data_out["country"].apply(format_country_name)
fp_out = "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/AFOLU/ifa_data_fertilizer_input_estimates_by_country_by_year.csv"
df_data_out.to_csv(fp_out, index = None, encoding = "UTF-8")

In [167]:
len(set(df_data_out["country"]))

26

In [161]:
("(")[0].strip().lower().replace(" ", "_")
    return country_out


In [178]:
df_frst_emission["Item"].unique()

array(['Forestland', 'Net Forest conversion',
       'Carbon stock change in forests'], dtype=object)

In [182]:
sorted(list(df_frst_emission["Item"].unique()))

['AFOLU',
 'Agricultural Soils',
 'Burning - Crop residues',
 'Crop Residues',
 'Drained organic soils',
 'Drained organic soils (CO2)',
 'Drained organic soils (N2O)',
 'Emissions on agricultural land',
 'Enteric Fermentation',
 'Farm-gate emissions',
 'Fires in humid tropical forests',
 'Fires in organic soils',
 'Forest fires',
 'Forestland',
 'IPCC Agriculture',
 'LULUCF',
 'Manure Management',
 'Manure applied to Soils',
 'Manure left on Pasture',
 'Net Forest conversion',
 'On-farm energy use',
 'Rice Cultivation',
 'Savanna fires',
 'Synthetic Fertilizers']