In [275]:
import os, os.path
import numpy as np
import pandas as pd
import data_structures as ds
import setup_analysis as sa
import support_functions as sf
import importlib
import time
import warnings
import matplotlib.pyplot as plt
import geopandas as gpd
import rioxarray as rx
import itertools
import model_afolu as ma

In [277]:
dir_data = "/Users/jsyme/Documents/Projects/FY21/SWCHE131_1000/Data/AFOLU"

# set names 
fn_climates = "kc_1984_2013.tif"
fn_countries = "WB_countries_Admin0_10m"
fn_cw = "values_info_with_cw_kc_1984_2013.csv"

fp_climates = os.path.join(dir_data, fn_climates)
fp_countries = os.path.join(dir_data, fn_countries, f"{fn_countries}.shp")
fp_cw = os.path.join(dir_data, fn_cw)

model_afolu = ma.AFOLU(sa.model_attributes)

In [8]:
# convert geotiff to dataframe
rx_array = rx.open_rasterio(fp_climates)
df_climates = rx_array[0].to_pandas()
# retrieve climate categories
df_climate_cats = pd.read_csv(fp_cw, sep = "|")

In [47]:
df_climates = df_climates.reset_index()#[-179.97083333333333].unique()
dfc = pd.melt(df_climates, ["y"]);
val = float(dfc["value"].iloc[0])
dfc = dfc[dfc["value"] != val].reset_index(drop = True)

# set some bounds
# http://bboxfinder.com/#-55.776573,-126.386719,33.431441,-33.398438
x_max = -34
x_min = -119
y_max = 33
y_min = -58
# filter out
dfc2 = dfc[
    (dfc["x"] <= x_max) & 
    (dfc["x"] >= x_min) & 
    (dfc["y"] <= y_max) & 
    (dfc["y"] >= y_min)
].reset_index(drop = True)

# create points with climates
gdf_climates = gpd.GeoDataFrame(
    dfc2, 
    geometry = gpd.points_from_xy(dfc2["x"], dfc2["y"])
)
gdf_climates.crs = "EPSG:4326"
gdf_climates_cut = gdf_climates.copy()

In [87]:
# get countries 
countries_keep = list(sa.model_attributes.dict_attributes.get("region").table["category_name"])
gdf_world = gpd.read_file(fp_countries)

# some replacements
field_en = "NAME_EN"
dict_repl = {"The Bahamas": "Bahamas"}
gdf_world[field_en] = gdf_world[field_en].replace(dict_repl)
gdf_world_red = gdf_world[gdf_world[field_en].isin(countries_keep)]
gdf_lac = gdf_world_red[["NAME_EN", "geometry"]]
gdf_lac_bounds = gdf_lac.bounds



In [118]:
gdf_join_test = gpd.sjoin(
    gdf_lac, 
    gdf_climates[["value", "geometry"]].rename(columns = {"value": "kcc"}), 
    how = "right"
);

KeyboardInterrupt: 

In [206]:
gdf_climates_cut[
    (gdf_climates_cut["y"] <= maxy) &
    (gdf_climates_cut["y"] >= miny)
]

miny

-55.918504222591764

In [215]:

i = 0
sep = "---"*25
print("Copying climate gdf...\n\n")
gdf_climates_cut = gdf_climates.copy()


df_out = []
df_agg_out = []
t0 = time.time()
list_times = [t0]
n = len(gdf_lac_bounds)

while i < n:
    
    # country
    gdf_country = gdf_lac.iloc[i:(i + 1)]
    bounds = gdf_lac_bounds.iloc[i:(i + 1)]

    country = str(gdf_country["NAME_EN"].iloc[0])
    maxx = float(bounds["maxx"].iloc[0])
    minx = float(bounds["minx"].iloc[0])
    maxy = float(bounds["maxy"].iloc[0])
    miny = float(bounds["miny"].iloc[0])
    
    pos = i + 1
    print(f"Starting country {country}\n{sep} ({pos}/{n})\n")
    
    # get points
    print("Filtering points...\n")
    gdf_points = gdf_climates_cut[
        (gdf_climates_cut["x"] <= maxx) &
        (gdf_climates_cut["x"] >= minx) &
        (gdf_climates_cut["y"] <= maxy) &
        (gdf_climates_cut["y"] >= miny)
    ]
    
    print(f"Merging points within bbox x = ({minx}, {maxx}), y = ({miny}, {maxy}) to country...\n")
    gdf_join = gpd.sjoin(
        gdf_country, 
        gdf_points[["value", "geometry"]].rename(columns = {"value": "kcc"}),
        how = "inner"
    ).drop("geometry", axis = 1)
    
    
    print(f"Aggregating counts by climate...\n")
    gdf_agg = gdf_join[["NAME_EN", "kcc"]].copy()
    gdf_agg["count"] = 1
    gdf_agg = gdf_agg[["NAME_EN", "kcc", "count"]].groupby(["NAME_EN", "kcc"]).agg(
        {"NAME_EN": "first", "kcc": "first", "count": "sum"}
    ).reset_index(drop = True)

    
    print("Appending output...")
    df_out.append(gdf_join)
    df_agg_out.append(gdf_agg)
    
    
    print("Dropping indices...")
    indices_to_drop = list(gdf_join["index_right"])
    gdf_climates_cut.drop(indices_to_drop, axis = 0, inplace = True)
    
    
    i += 1
    list_times.append(time.time())
    t_delta = np.round(list_times[i] - list_times[i - 1], 2)
    t_elapsed = np.round(list_times[i] - list_times[0])
    
    print(f"\n{sep}\n\nCountry {country} complete in {t_delta} seconds ({t_elapsed} seconds overall)\n\n{sep}\n\n\n")
    

Copying climate gdf...


Starting country Chile
---------------------------------------------------------------------------

Filtering points...

Merging points within bbox x = (-109.45372473890552, -66.42080644356247), y = (-55.918504222591764, -17.50658819768711) to country...

Aggregating counts by climate...

Appending output...
Dropping indices...

---------------------------------------------------------------------------

Country Chile complete in 103.47 sounds (103.0 seconds overall)

---------------------------------------------------------------------------



Starting country Bolivia
---------------------------------------------------------------------------

Filtering points...

Merging points within bbox x = (-69.66649226960254, -57.46566076733052), y = (-22.897257587567594, -9.679821471783441) to country...

Aggregating counts by climate...

Appending output...
Dropping indices...

---------------------------------------------------------------------------

Country Bolivi

In [219]:
df_out_full = pd.concat(df_out, axis = 0).reset_index(drop = True);
df_out_agg = pd.concat(df_agg_out, axis = 0).reset_index(drop = True);

# export output
df_out_full.to_csv(sa.fp_csv_kcc_cells_merged_to_country, index = None, encoding = "UTF-8")
df_out_agg.to_csv(sa.fp_csv_kcc_cell_counts_by_country_kcc, index = None, encoding = "UTF-8")

In [337]:

def clean_countries(country):
    return country.strip().replace(" ", "_").lower()


##
##  BUILD FRACTIONS WET/DRY AND TEMPERATE/TROPICAL BY COUNTRY
##

df_out_agg["count"] = df_out_agg["count"].astype(int)

# get total counts
df_out_agg_totals = df_out_agg[["NAME_EN", "count"]].groupby(["NAME_EN"]).agg({"NAME_EN": "first", "count": "sum"}).reset_index(drop = True).rename(columns = {"count": "total_count"})

# 
df_climate_by_country = pd.merge(
    df_out_agg, 
    df_climate_cats[["code_num", "wet_dry_cat", "temperate_tropical_cat"]].rename(columns = {"code_num": "kcc"})
)

# break out by wet/dry and temperate/tropical
df_climate_by_country_temptrop = df_climate_by_country.drop(["wet_dry_cat"], axis = 1)
df_climate_by_country_wetdry = df_climate_by_country.drop(["temperate_tropical_cat"], axis = 1)
all_countries = sorted(list(set(df_climate_by_country["NAME_EN"])))

# loop over different splits
fields_loop = ["wet_dry_cat", "temperate_tropical_cat"]
dict_df_climate_aggs = {}
for field in fields_loop:
    
    fields_grp = ["NAME_EN", field]
    fields_dat = ["count"]
    
    all_vals = sorted(list(set(df_climate_by_country[field])))
    df_tmp = df_climate_by_country.drop([x for x in fields_loop if (x != field)], axis = 1)
    dict_agg = dict(zip(fields_grp, ["first" for x in fields_grp]))
    dict_agg.update(dict(zip(fields_dat, ["sum" for x in fields_dat])))
    
    df_tmp = df_tmp[fields_grp + fields_dat].groupby(fields_grp).agg(dict_agg).reset_index(drop = True)
    df_expand = pd.DataFrame(itertools.product(all_countries, all_vals), columns = ["NAME_EN", field])
    df_expand = pd.merge(df_expand, df_out_agg_totals, how = "left").fillna(0)
    
    df_tmp = pd.merge(df_expand, df_tmp, how = "left").fillna(0)
    df_tmp["frac"] = np.array(df_tmp["count"])/np.array(df_tmp["total_count"])
    df_tmp.rename(columns = {"NAME_EN": "country"}, inplace = True)
    
    df_tmp["country"] = df_tmp["country"].apply(clean_countries)

    
    # get pivot
    df_piv = pd.pivot(
        df_tmp[["country", field, "frac"]],
        index = ["country"],
        columns = [field], 
        values = ["frac"]
    ).reset_index()
    df_piv.columns = df_piv.columns.to_flat_index()
    dict_rnm = dict([(x, x[1].replace(")", "").strip()) for x in df_piv.columns if (x[1] != "")])
    dict_rnm.update({("country", ""): "country"})
    df_piv.rename(columns = dict_rnm, inplace = True)
    
    dict_df_climate_aggs.update({field: df_piv})


In [343]:
##  TEMPERATE/TROPICAL SPLITS

varlist_tt = []
# ag temp/trop vars
varlist_tt += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_agrc, model_afolu.modvar_agrc_frac_temperate)
varlist_tt += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_agrc, model_afolu.modvar_agrc_frac_tropical)
# frst temp/trop vars
varlist_tt += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_frst, model_afolu.modvar_frst_frac_temperate_nutrient_rich)
varlist_tt += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_frst, model_afolu.modvar_frst_frac_temperate_nutrient_poor)
varlist_tt += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_frst, model_afolu.modvar_frst_frac_tropical)
# lndu temp/trop vars
varlist_tt += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_lndu, model_afolu.modvar_lndu_frac_temperate)
varlist_tt += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_lndu, model_afolu.modvar_lndu_frac_tropical)


##  WET/DRY SPLITS

varlist_wd = []
# ag temp/trop vars
varlist_wd += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_agrc, model_afolu.modvar_agrc_frac_wet)
varlist_wd += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_agrc, model_afolu.modvar_agrc_frac_dry)
# lndu temp/trop vars
varlist_wd += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_lndu, model_afolu.modvar_lndu_frac_wet)
varlist_wd += sa.model_attributes.build_varlist(sa.model_attributes.subsec_name_lndu, model_afolu.modvar_lndu_frac_dry)



In [376]:

attr_tp = sa.model_attributes.dict_attributes.get("dim_time_period")
df_tp = attr_tp.table[[attr_tp.key]].copy()
df_tp["merge"] = -1

df_ret = dict_df_climate_aggs[field][["country"]].copy()

for field in dict_df_climate_aggs.keys():
    
    loop = varlist_tt if (field == "temperate_tropical_cat") else varlist_wd
    for var in loop:
        if field == "temperate_tropical_cat":
            fld = "temperate" if ("temperate" in var) else ("tropical")
        else:
            fld = "wet" if ("wet" in var) else "dry"
        div = 2 if (("nutrient_poor" in var) or ("nutrient_rich" in var)) else 1
        df_merge = dict_df_climate_aggs[field][["country", fld]].copy().rename(columns = {fld: var})
        df_ret = pd.merge(df_ret, df_merge)
        df_ret[var] = np.array(df_ret[var])/div

# merge in time period
df_ret["merge"] = -1
fields_sort = ["country", attr_tp.key]
df_ret = pd.merge(df_ret, df_tp).drop("merge", axis = 1).sort_values(by = fields_sort).reset_index(drop = True)
df_ret = df_ret[fields_sort + sorted([x for x in df_ret.columns if (x not in fields_sort)])]

df_ret.to_csv(
    sa.fp_csv_climate_fields_by_country_simple, index = None, encoding = "UTF-8"
)

In [378]:
df_ret

Unnamed: 0,country,time_period,frac_agrc_bevs_and_spices_cl1_temperate,frac_agrc_bevs_and_spices_cl1_tropical,frac_agrc_bevs_and_spices_cl2_dry,frac_agrc_bevs_and_spices_cl2_wet,frac_agrc_cereals_cl1_temperate,frac_agrc_cereals_cl1_tropical,frac_agrc_cereals_cl2_dry,frac_agrc_cereals_cl2_wet,...,frac_frst_secondary_cl1_temperate_nutrient_rich,frac_frst_secondary_cl1_tropical,frac_lndu_grasslands_cl1_temperate,frac_lndu_grasslands_cl1_tropical,frac_lndu_grasslands_cl2_dry,frac_lndu_grasslands_cl2_wet,frac_lndu_other_cl2_dry,frac_lndu_other_cl2_wet,frac_lndu_settlements_cl2_dry,frac_lndu_settlements_cl2_wet
0,argentina,0,1.000000,0.000000,0.685063,0.314937,1.000000,0.000000,0.685063,0.314937,...,0.500000,0.000000,1.000000,0.000000,0.685063,0.314937,0.685063,0.314937,0.685063,0.314937
1,argentina,1,1.000000,0.000000,0.685063,0.314937,1.000000,0.000000,0.685063,0.314937,...,0.500000,0.000000,1.000000,0.000000,0.685063,0.314937,0.685063,0.314937,0.685063,0.314937
2,argentina,2,1.000000,0.000000,0.685063,0.314937,1.000000,0.000000,0.685063,0.314937,...,0.500000,0.000000,1.000000,0.000000,0.685063,0.314937,0.685063,0.314937,0.685063,0.314937
3,argentina,3,1.000000,0.000000,0.685063,0.314937,1.000000,0.000000,0.685063,0.314937,...,0.500000,0.000000,1.000000,0.000000,0.685063,0.314937,0.685063,0.314937,0.685063,0.314937
4,argentina,4,1.000000,0.000000,0.685063,0.314937,1.000000,0.000000,0.685063,0.314937,...,0.500000,0.000000,1.000000,0.000000,0.685063,0.314937,0.685063,0.314937,0.685063,0.314937
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,venezuela,31,0.050095,0.949905,0.031173,0.968827,0.050095,0.949905,0.031173,0.968827,...,0.025048,0.949905,0.050095,0.949905,0.031173,0.968827,0.031173,0.968827,0.031173,0.968827
932,venezuela,32,0.050095,0.949905,0.031173,0.968827,0.050095,0.949905,0.031173,0.968827,...,0.025048,0.949905,0.050095,0.949905,0.031173,0.968827,0.031173,0.968827,0.031173,0.968827
933,venezuela,33,0.050095,0.949905,0.031173,0.968827,0.050095,0.949905,0.031173,0.968827,...,0.025048,0.949905,0.050095,0.949905,0.031173,0.968827,0.031173,0.968827,0.031173,0.968827
934,venezuela,34,0.050095,0.949905,0.031173,0.968827,0.050095,0.949905,0.031173,0.968827,...,0.025048,0.949905,0.050095,0.949905,0.031173,0.968827,0.031173,0.968827,0.031173,0.968827
