In [1]:
import os, os.path
import numpy as np
from time import time
import pandas as pd
import math as m
import setup_runs as sr
import importlib
import matplotlib.pyplot as plt
import itertools



In [2]:
def compare_params(df_old, df_new, fields_merge, fields_data):
    
    dict_out = {}
    
    #get fields
    fields_dat_old = [x for x in fields_data if x in df_old.columns]
    fields_dat_new = [x for x in fields_data if x in df_new.columns]
    fields_dat_red = list(set(fields_dat_new) & set(fields_dat_old))
    fields_dat_red = [x for x in fields_data if x in fields_dat_red]

    #find fields that were dropped, if they exist
    fields_dat_compl_old = list(set(fields_data) - set(fields_dat_old))
    fields_dat_compl_new = list(set(fields_data) - set(fields_dat_new))
    
    if len(fields_dat_compl_old) > 0:
        #notify what fields were dropped
        fields_dat_compl_old.sort()
        sr.print_list_output(fields_dat_compl_old, "Dropping data fields from df_old")
        #add to output dictionary
        dict_out.update({"fields_dat_compl_old": fields_dat_compl_old})
        
    if len(fields_dat_compl_new) > 0:
        #notify what fields were dropped
        fields_dat_compl_new.sort()
        sr.print_list_output(fields_dat_compl_new, "Dropping data fields from df_new")
        #add to output dictionary
        dict_out.update({"fields_dat_compl_new": fields_dat_compl_new})
    
    #get intersection and order
    fm = list(set(df_old.columns) & set(df_new.columns) & set(fields_merge))
    fm = [x for x in fields_merge if x in fm]
    #notify of fields that are merged on
    sr.print_list_output(fm, "Merging on")
    
    #build dictionaries to rename
    dict_rnm_new = dict([[x, "newp_" + x] for x in fields_dat_new])
    dict_rnm_old = dict([[x, "oldp_" + x] for x in fields_dat_old])
    dict_out.update({"dict_rnm_new": dict_rnm_new, "dict_rnm_old": dict_rnm_old})
    #rename the data frames
    df_new = df_new[fm + fields_dat_new].rename(columns = dict_rnm_new)
    df_old = df_old[fm + fields_dat_old].rename(columns = dict_rnm_old)
    
    #merge
    df_comp = pd.merge(df_old, df_new, how = "inner", on = fm).sort_values(by = fm).reset_index(drop = True).fillna(0)
    dict_out.update({"df_comp": df_comp})

    #find parameters that were dropped, if they exist
    parameters_dat_compl_old = list(set(df_comp["parameter"]) - set(df_old["parameter"]))
    parameters_dat_compl_new = list(set(df_comp["parameter"]) - set(df_new["parameter"]))

    #notify
    if len(parameters_dat_compl_old) > 0:
        #notify what fields were dropped
        parameters_dat_compl_old.sort()
        sr.print_list_output(parameters_dat_compl_old, "The following parameters from df_old were not kept")
        #add to output dictionary
        dict_out.update({"parameters_dat_compl_old": parameters_dat_compl_old})

    if len(parameters_dat_compl_new) > 0:
        #notify what fields were dropped
        parameters_dat_compl_new.sort()
        sr.print_list_output(parameters_dat_compl_new, "The following parameters from df_new were not kept")
        #add to output dictionary
        dict_out.update({"parameters_dat_compl_new": parameters_dat_compl_new})

    #compare fields
    array_new = np.array(df_comp[[("newp_" + x) for x in fields_dat_red]])
    array_old = np.array(df_comp[[("oldp_" + x) for x in fields_dat_red]])
    array_diff = abs(array_new - array_old)

    params_diff = set({})
    #find differences
    for i in range(len(array_diff)):
        w = np.where(array_diff[i] > 0.0001)[0]
        if len(w) > 0:
            #get parameter
            param = df_comp["parameter"].iloc[i]
            #add to output set
            params_diff = params_diff | set({param})

    dict_out.update({"params_diff": params_diff})
    
    return dict_out
    
    
    

In [5]:
df_params = pd.read_csv(sr.fp_csv_parameter_ranges)#(os.path.join(sr.dir_ref, "archive", "parameter_ranges_20210201.csv"))
#df2 = pd.read_csv(sr.fp_csv_parameter_ranges.replace(".csv", "_design_id-1_20210213.csv"))#(os.path.join(sr.dir_ref, "archive", "parameter_ranges_20210201.csv"))

df_0 = df_params[df_params["strategy_id"] == 0][[x for x in df_params.columns if x != "strategy_id"]].copy()
df_1 = df_params[df_params["strategy_id"] == 1][[x for x in df_params.columns if x != "strategy_id"]].copy()

fields_merge = ["sector", "time_series_id", "parameter"]
fields_data = ["normalize_group", "trajgroup_no_vary_q", "parameter_constant_q", "min_2050", "max_2050"] + [str(x) for x in range(2015, 2051)]


dict_compare = compare_params(df_0, df_1, fields_merge, fields_data)
    





##############################
Merging on:
	sector
	time_series_id
	parameter
##############################





In [13]:
df_params[df_params["parameter"].isin(dict_compare["params_diff"])][["parameter", "sector"]].drop_duplicates().copy().reset_index(drop = True)#.to_csv("/Users/jsyme/Desktop/parameters_different_in_ndcp_unique_list.csv", index = None, encoding = "UTF-8")



Unnamed: 0,parameter,sector
0,manejo_holistico_de_gando,Analytica_agriculture
1,medida_cambio_dieta_nacional,Analytica_agriculture
2,medida_captura_c_suelos,Analytica_agriculture
3,medida_algas_pardas,Analytica_forestry
4,medida_aumento_parques_reservas,Analytica_forestry
5,medida_forestacion_aumentada,Analytica_forestry
6,medida_manejo_aumentada,Analytica_forestry
7,medida_pmr_menos_papel,Analytica_forestry
8,nueva_cap_recuperacion,Analytica_ippu
9,fp_compost,Analytica_waste


In [8]:


fields_merge = ["sector", "time_series_id", "strategy_id", "parameter"]
fields_data = ["normalize_group", "trajgroup_no_vary_q", "parameter_constant_q", "min_2050", "max_2050"] + [str(x) for x in range(2015, 2051)]

df_params_old = pd.read_csv(sr.fp_csv_parameter_ranges.replace(".csv", "_design_id-1.csv"))#(os.path.join(sr.dir_ref, "archive", "parameter_ranges_20210201.csv"))
df_params_car_ndc = pd.read_csv(os.path.join(sr.dir_ref, "ndc_plus_to_integrate", "parameter_ranges_carlos_ndc+.csv"))

secs_anaytica = set([x for x in list(df_params_old["sector"]) if "Analytica" in x]) | set({"all"})
secs_carlos = set(df_params_old["sector"]) - secs_anaytica

#read in additional files
df_params_ag_ndc = pd.read_excel(os.path.join(sr.dir_model, "analytica", "agriculture", "parameter_ranges-Rev-NDC+Agriculture.xlsx"))
df_params_fo_ndc = pd.read_excel(os.path.join(sr.dir_model, "analytica", "forestry", "parameter_ranges_Rev-NDC+Forestry.xlsx"))
df_params_ipwa_ndc = pd.read_csv(os.path.join(sr.dir_ref, "20210119-parameter_ranges -WasteIPPU.csv"))


#initialize the new strategy
df_params_init_strat = df_params_old[df_params_old["strategy_id"] == 1].copy()
df_params_init_strat["strategy_id"] = df_params_init_strat["strategy_id"].replace({1: 2})
#build
#df_params_base = pd.concat([df_params_old, df_params_init_strat], axis = 0).reset_index(drop = True)
df_params_base = df_params_old

#sets of files to integrate
dict_new_params = {
    "carlos": df_params_car_ndc,
    "agriculture": df_params_ag_ndc[df_params_ag_ndc["sector"] == "Analytica_agriculture"],
    "forestry": df_params_fo_ndc[df_params_fo_ndc["sector"] == "Analytica_forestry"],
    "ippu": df_params_ipwa_ndc[df_params_ipwa_ndc["sector"] == "Analytica_ippu"],
    "waste": df_params_ipwa_ndc[df_params_ipwa_ndc["sector"] == "Analytica_waste"]
}


#initialize the base
df_params_out = df_params_base[df_params_base["sector"].isin(["Analytica_all", "all"])]
df_params_init_strat = df_params_out[df_params_out["strategy_id"] == 1].copy()
df_params_init_strat["strategy_id"] = df_params_init_strat["strategy_id"].replace({1: 2})
df_params_out = pd.concat([df_params_out, df_params_init_strat], axis = 0).reset_index(drop = True)
#initialize as an ouput
df_params_out = [df_params_out]

for sec in ["carlos"]:#, "agriculture", "forestry", "ippu", "waste"]:
    
    if sec != "carlos":
        sec_an = "Analytica_" + sec
        #
        df_base_an = df_params_base[df_params_base["sector"] == sec_an]
        df_new_an = dict_new_params[sec]
    else:
        df_base_an = df_params_base[df_params_base["sector"].isin(secs_carlos)]
        df_new_an = dict_new_params[sec][dict_new_params[sec]["sector"].isin(secs_carlos)]
    
    
    #columns to rename to string
    dict_rnm_base = dict([[x, str(x)] for x in df_base_an.columns if (type(x) == int) or x.isnumeric()])
    dict_rnm_new = dict([[x, str(x)] for x in df_new_an.columns if (type(x) == int) or x.isnumeric()])

    df_base_an = df_base_an.rename(columns = dict_rnm_base)
    df_new_an = df_new_an.rename(columns = dict_rnm_new)
    
    
    #strategies in both
    strat_both = set(df_base_an["strategy_id"]) & set(df_new_an["strategy_id"])
    #new strategies
    strat_comp_new = set(df_new_an["strategy_id"]) - strat_both
    #compare old, pre NDC+ strategies to new file to check for additional changes to parameters
    dict_compare = compare_params(df_base_an[df_base_an["strategy_id"].isin(strat_both)], df_new_an[df_new_an["strategy_id"].isin(strat_both)], fields_merge, fields_data)
    #get parameters that have changed
    change_params = dict_compare["params_diff"]
    #get new params
    params_new = set(df_new_an["parameter"]) - set(df_base_an["parameter"])
    
    
    #drop any parameters from the base
    df_base_an = df_base_an[~df_base_an["parameter"].isin(change_params)]
    #pull change params, new params, or additional data from 
    df_new_an = df_new_an[df_new_an["parameter"].isin(change_params) | df_new_an["parameter"].isin(params_new) | df_new_an["strategy_id"].isin(strat_comp_new)]
    
    fields_both = set(df_base_an.columns) & set(df_new_an.columns)
    #add any fields
    if set(df_base_an.columns) != set(df_new_an.columns):
        add_base = list(set(df_new_an.columns) - set(df_base_an.columns))
        add_new = list(set(df_base_an.columns) - set(df_new_an.columns))
        
        for field in add_base:
            df_base_an[field] = [np.nan for x in range(len(df_base_an))]
            
        for field in add_new:
            df_new_an[field] = [np.nan for x in range(len(df_new_an))]
    
    df_comp = pd.concat([df_base_an, df_new_an[list(df_base_an.columns)]], axis = 0, ignore_index = True)
    #add to the output
    df_params_out.append(df_comp)

df_params_out = pd.concat(df_params_out, axis = 0)   

#some parameters to replace
dict_preps = {
    "Manejo_Holistico_de_gando": "manejo_holistico_de_gando", 
    "Medida_Cambio_Dieta_Nacional": "medida_cambio_dieta_nacional",
    "fpCompost": "fp_compost"
}

df_params_out["parameter"] = df_params_out["parameter"].replace(dict_preps)
#update the strategies
df_params_out = df_params_out[df_params_out["strategy_id"] > 0].copy()
df_params_out["strategy_id"] = df_params_out["strategy_id"].replace({1:0})
df_params_out["strategy_id"] = df_params_out["strategy_id"].replace({2:1})

#fields to sort on
fields_sort = ["sector", "type", "parameter", "time_series_id", "strategy_id"]
df_params_out = df_params_out.sort_values(by = fields_sort).reset_index(drop = True)
    




##############################
Dropping data fields from df_new:
	trajgroup_no_vary_q
##############################






##############################
Merging on:
	sector
	time_series_id
	strategy_id
	parameter
##############################





In [27]:
cp = list(change_params)

df_params_old = pd.read_csv(sr.fp_csv_parameter_ranges)
strs = ["trajmax", "trajmin", "trajmix"]

updates = []
for c in cp:
    for s in strs:
        comp = list(set(strs) - set({s}))
        if s in c:
            #strings to add in
            updates = updates + [c.replace(s, x) for x in comp]
updates = set(updates)
change_params = change_params | updates

fields_sort = ["sector", "parameter", "time_series_id", "strategy_id"]
df_out1 = dict_new_params["carlos"][dict_new_params["carlos"]["parameter"].isin(change_params)].reset_index(drop = True).sort_values(by = fields_sort)
df_out1["strategy_id"] = df_out1["strategy_id"].replace({1: 0})
df_out1["strategy_id"] = df_out1["strategy_id"].replace({2: 1})
df_out2 = df_params_old[~df_params_old["parameter"].isin(change_params)].reset_index(drop = True).sort_values(by = fields_sort)
df_out3 = df_params_old[df_params_old["parameter"].isin(change_params)].reset_index(drop = True).sort_values(by = fields_sort)

df_out1.to_csv(sr.fp_csv_parameter_ranges.replace(".csv", "_CARCOMP.csv"), index = None, encoding = "UTF-8")
df_out2.to_csv(sr.fp_csv_parameter_ranges.replace(".csv", "_D1-REV.csv"), index = None, encoding = "UTF-8")
df_out3.to_csv(sr.fp_csv_parameter_ranges.replace(".csv", "_D1-OLD.csv"), index = None, encoding = "UTF-8")






In [195]:
for sec in list(df_params_out["sector"].unique()):
    
    print("SECTOR " + sec + "\n")
    dfo = df_params_out[(df_params_out["sector"] == sec) & (df_params_out["strategy_id"] > 0)]
    s = dfo.shape
    
    #get parameters
    print("Sec shape: ")
    print(s)
    print("")
    
    #get numbers of parameters
    p = set(dfo["parameter"])
    
    if len(p)*4 != s[0]:
        dfop = list(list(dfo["parameter"]))
        #get params that are undercounted
        list_print = [x for x in list(set(dfop)) if dfop.count(x) < 4]
        
        sr.print_list_output(list_print, "incomplete parameters in sector '" + sec + "'")
        
        
    print("\n\n")

SECTOR Analytica_all

Sec shape: 
(32, 46)




SECTOR all

Sec shape: 
(8, 46)




SECTOR commercial

Sec shape: 
(308, 46)




SECTOR electricity_generation

Sec shape: 
(112, 46)




SECTOR industry_and_mining

Sec shape: 
(1760, 46)




SECTOR public

Sec shape: 
(44, 46)




SECTOR residential

Sec shape: 
(588, 46)




SECTOR transport

Sec shape: 
(404, 46)




SECTOR Analytica_agriculture

Sec shape: 
(48, 46)




SECTOR Analytica_forestry

Sec shape: 
(84, 46)




SECTOR Analytica_ippu

Sec shape: 
(28, 46)




SECTOR Analytica_waste

Sec shape: 
(68, 46)






In [196]:
ll = list(df_params_out["parameter"].unique())#[df_params_out["strategy_id"] != 0]
ll.sort()

rep = {
    "Manejo_Holistico_de_gando": "manejo_holistico_de_gando", 
    "Medida_Cambio_Dieta_Nacional": "medida_cambio_dieta_nacional",
    "fpCompost": "fp_compost"
}
sr.print_list_output(ll, "params")








##############################
params:
	cement_emission_fact_coal
	cement_emission_fact_diesel
	cement_emission_fact_kerosene
	cement_emission_fact_natural_gas
	cement_frac_biomass
	cement_frac_coal
	cement_frac_diesel
	cement_frac_electric
	cement_frac_hydrogen
	cement_frac_kerosene
	cement_frac_natural_gas
	cement_frac_solar
	cement_frac_thermal_solar
	cement_intensity
	cement_production
	commercial_acs_diesel
	commercial_acs_natural_gas
	commercial_acs_pliqgas
	commercial_activity_acs
	commercial_activity_heating
	commercial_activity_motive
	commercial_activity_other
	commercial_dem_acs
	commercial_dem_heating
	commercial_dem_motive
	commercial_dem_other
	commercial_efficiency_acs_diesel
	commercial_efficiency_acs_natural_gas
	commercial_efficiency_acs_pliqgas
	commercial_efficiency_heating_diesel
	commercial_efficiency_heating_electric
	commercial_efficiency_heating_natural_gas
	commercial_efficiency_heating_pliqgas
	commercial_efficiency_motive_electric
	commercial_efficiency_o

In [94]:
p_diff_carlos

['incertidumbre_var_demanda_legna',
 'industry_and_mining_fuel_price_coal',
 'industry_and_mining_fuel_price_coke',
 'industry_and_mining_fuel_price_diesel',
 'industry_and_mining_fuel_price_electric',
 'industry_and_mining_fuel_price_fuel_oil',
 'industry_and_mining_fuel_price_hydrogen',
 'industry_and_mining_fuel_price_natural_gas',
 'industry_and_mining_fuel_price_pliqgas',
 'iron_efficiency_heat_hydrogen',
 'iron_efficiency_heat_solar',
 'medida_biodigestores',
 'medida_cambio_dieta_bovina',
 'medida_captura_c_suelos',
 'medida_uso_eficiente_fertilizante',
 'other_industries_efficiency_heat_hydrogen',
 'other_industries_efficiency_heat_solar',
 'other_industries_efficiency_motor_diesel',
 'other_industries_elasticity',
 'other_industries_motor_diesel',
 'other_industries_motor_electric',
 'other_industries_motor_pliqgas',
 'other_industries_rate_useful_energy',
 'other_industries_share_heat',
 'other_industries_share_motor',
 'other_industries_share_other',
 'other_mining_efficienc