In [1]:
# -------------------------------------------------
# 
# get batch results from profile data 
# 
# Uses the `postproc_profs` subdir to get the 
# soil profile inputs.
# 
# -------------------------------------------------
import fnmatch
import glob
import os
import pickle
import re
from typing import Tuple

import cmocean.cm as cmo
import fsspec         # for AWS integration
import s3fs
from matplotlib.cm import ScalarMappable
from matplotlib.colors import TwoSlopeNorm
from matplotlib.colors import Normalize
from matplotlib.lines import Line2D  # for custom legend entries (needed for contour plot)
from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr

import cdr_fxns_postproc as cfp

## Calculation overview
This script takes in a list of file names, merges all of the like files from each run in the batch, then saves each merged file.

### accepted file names (equivalent to files that could exist in the scepter output)

"adsorbed_percCEC", "adsorbed_ppm", "adsorbed", "aqueous_total", 
"aqueous", "bulksoil", "exchange_total", "gas", "rate", "soil_ph", 
"solid_sp_saturation", "solid_volumePercent", "solid_weightPercent", 
"solid", "specific_surface_area", "surface_area"

In [2]:
# --- postproc profile file names:
# 
# ["adsorbed_percCEC", "adsorbed_ppm", "adsorbed", "aqueous_total", 
#  "aqueous", "bulksoil", "exchange_total", "gas", "rate", "soil_ph", 
#  "solid_sp_saturation", "solid_volumePercent", "solid_weightPercent", 
#  "solid", "specific_surface_area", "surface_area"]
# 

### make a dictionary denoting which files to run

In [3]:
proc_dict = {
    "adsorbed": False,
    "adsorbed_percCEC": True,
    "adsorbed_ppm": False,
    "aqueous": True,
    "aqueous_total": False,
    "bulksoil": True,
    "exchange_total": False,
    "gas": True,
    "rate": False,
    "soil_ph": True,
    "solid": True,
    "solid_sp_saturation": True, 
    "solid_volumePercent": True,
    "solid_weightPercent": True,
    "specific_surface_area": True,
    "surface_area": False,
}

### denote which dimensions to include in batch processing

In [4]:
batch_axes = ["dustrad", "dustrate_ton_ha_yr"]

## read in the batch csv files

In [5]:
# set up the batch file info
# 
# --- 
runtype = "field"   # field or lab
fertlevel = "hi"   # "no", "low", "mid", or "hi"
dustsp = "gbas"     # "gbas", "wls" (silicate species)
sitename = "311a"   # "311a" or "311b"
multiyear_sil = False    # whether we are looking for multi-year composites for silicate case
multiyear_cc = False   # whether we are looking for multi-year composites for cc case

tag_sil = "AWS-TEST1"  # an extra tag in the casenames, something like "wet" (use "" for none)
tag_cc = "AWS-TEST1"   # an extra tag in the casenames, something like "wet" (use "" for none)

version_sil_csv = "v0"   # version appended to the csv file for the batch input
version_cc_csv = "v0"

# --- SAVE DETAILS -----------------------------------------------------------------------------------------
save_results = True
if multiyear_sil or multiyear_cc:
    savedir_pref = f"meanAnn_shortRun_{fertlevel}Fert_{tag_sil}_multiyear"   # prefix for the save directory (which will be created upon save)
else:
    savedir_pref = f"meanAnn_shortRun_{fertlevel}Fert_{tag_sil}"   # prefix for the save directory (which will be created upon save)

savepath = "/home/tykukla/aglime-swap-cdr/scepter/process/runs/batch_postprocResults/cc-sil_psize_apprate"
# ----------------------------------------------------------------------------------------------------------


# [SILICATE RUNS]
if multiyear_sil:
    runname_ctrl_sil = [f"{fertlevel}Fert_{dustsp}_multiyear_site_{sitename}_app_0p0_psize_*_composite_{runtype}"]
    csv_fn_sil = f"meanAnn_{dustsp}_shortRun_multiyear_{fertlevel}Fert_gs+apprate_{version_sil_csv}.csv"
else:
    runname_ctrl_sil = [f"{fertlevel}Fert_{dustsp}_{tag_sil}_site_{sitename}_app_0p0_psize_*_{dustsp}_{runtype}_tau15p0"]
    csv_fn_sil = f"meanAnn_{dustsp}_shortRun_{tag_sil}_{fertlevel}Fert_gs+apprate_{version_sil_csv}.csv"

# [CALCITE RUNS]
if multiyear_cc: 
    runname_ctrl_cc = [f"{fertlevel}Fert_cc_multiyear_site_{sitename}_app_0p0_psize_*_composite_{runtype}"]
    csv_fn_cc = f"meanAnn_cc_shortRun_multiyear_{fertlevel}Fert_gs+apprate_{version_cc_csv}.csv"
else:
    runname_ctrl_cc = [f"{fertlevel}Fert_cc_{tag_cc}_site_{sitename}_app_0p0_psize_*_cc_{runtype}_tau15p0"]
    csv_fn_cc = f"meanAnn_cc_shortRun_{tag_cc}_{fertlevel}Fert_gs+apprate_{version_cc_csv}.csv"

# ---
# groundwork
# outdir = "/home/tykukla/SCEPTER/scepter_output"
outdir = "s3://carbonplan-carbon-removal/SCEPTER/scepter_output_scratch/"
csv_loc = "/home/tykukla/aglime-swap-cdr/scepter/batch-inputs"

In [6]:
# --- read in the batch .csv 
dfin_cc = pd.read_csv(os.path.join(csv_loc, csv_fn_cc))
dfin_sil = pd.read_csv(os.path.join(csv_loc, csv_fn_sil))

# [CALCITE]
# add column for the full run id
if multiyear_cc:
    dfin_cc["newrun_id_full"] = dfin_cc['newrun_id'] + f"_composite_{runtype}"
else:
    dfin_cc["newrun_id_full"] = dfin_cc['newrun_id'] + "_" + dfin_cc['dustsp'] + "_" + runtype + "_tau"+dfin_cc["duration"].astype(float).astype(str).str.replace(".", "p")  # (duration has to be turned into float first because otherwise we miss the decimal pt)
# identify the control run
dfin_cc["ctrl_run"] = dfin_cc['newrun_id_full'].apply(lambda filename: any(fnmatch.fnmatch(filename, pattern) for pattern in runname_ctrl_cc))
# eliminate other sites if necessary
if len(dfin_cc['site'].unique()) > 1:
    dfin_cc = dfin_cc[dfin_cc['site'] == f"site_{sitename}"].copy()
# add a column for the dustrate in ton_ha_yr
if "dustrate" in dfin_cc.columns:
    dfin_cc["dustrate_ton_ha_yr"] = dfin_cc["dustrate"] / 100 
    
# [SILICATE]
# add column for the full run id
if multiyear_sil:
    dfin_sil["newrun_id_full"] = dfin_sil['newrun_id'] + f"_composite_{runtype}"
else:
    dfin_sil["newrun_id_full"] = dfin_sil['newrun_id'] + "_" + dfin_sil['dustsp'] + "_" + runtype + "_tau"+dfin_sil["duration"].astype(float).astype(str).str.replace(".", "p")  # (duration has to be turned into float first because otherwise we miss the decimal pt)
# identify the control run
dfin_sil["ctrl_run"] = dfin_sil['newrun_id_full'].apply(lambda filename: any(fnmatch.fnmatch(filename, pattern) for pattern in runname_ctrl_sil))
# eliminate other sites if necessary
if len(dfin_sil['site'].unique()) > 1:
    dfin_sil = dfin_sil[dfin_sil['site'] == f"site_{sitename}"].copy()
# add a column for the dustrate in ton_ha_yr
if "dustrate" in dfin_sil.columns:
    dfin_sil["dustrate_ton_ha_yr"] = dfin_sil["dustrate"] / 100 


In [7]:
# # --- OPTIONAL: Filter out part of CSV

# # [1] by climatefile name (corresponds with site name)
# site = "site_311a"
# dfin_cc = dfin_cc[dfin_cc['climatefiles'] == 'site_311a']
# dfin_sil = dfin_sil[dfin_sil['climatefiles'] == 'site_311a']

# # [2] ... TK

### check that the batch csv inputs look right

In [8]:
# dfin_cc

In [9]:
# dfin_sil

## compile all the data
This returns a dictionary where each key is one of the filenames in proc_dict, and each value is the batch-merged dataset for that key.

In [10]:
# --- first for calcite 
dsdict_cc = cfp.prof_batchprocess_allvars(
    outdir, 
    dustsp='cc', 
    dfin=dfin_cc,
    batch_axes = batch_axes,
    proc_dict = proc_dict
    )

adsorbed
adsorbed_percCEC
adsorbed_ppm
aqueous
aqueous_total
bulksoil
exchange_total
gas
rate
soil_ph
solid
solid_sp_saturation
solid_volumePercent
solid_weightPercent
specific_surface_area
surface_area


In [11]:
# --- repeat for silicate 
# --- first for calcite 
dsdict_sil = cfp.prof_batchprocess_allvars(
    outdir, 
    dustsp='gbas', 
    dfin=dfin_sil,
    batch_axes = batch_axes,
    proc_dict = proc_dict
    )

adsorbed
adsorbed_percCEC
adsorbed_ppm
aqueous
aqueous_total
bulksoil
exchange_total
gas
rate
soil_ph
solid
solid_sp_saturation
solid_volumePercent
solid_weightPercent
specific_surface_area
surface_area


## save the results

In [12]:
# --- calcite
# note we generate the save directory here, then apply it in the next cell (!!)
dustsp = 'cc'
saved_here = cfp.save_batch_postproc_profOnly(
    dsdict = dsdict_cc,
    filename_suffix = f"_{dustsp}_batch",
    save_directory = None,
    base_path = savepath,
    base_dir_name = savedir_pref,
)

Directory created: /home/tykukla/aglime-swap-cdr/scepter/process/runs/batch_postprocResults/cc-sil_psize_apprate/meanAnn_shortRun_hiFert_AWS-TEST1_004


In [15]:
dustsp = 'gbas'
_ = cfp.save_batch_postproc_profOnly(
    dsdict = dsdict_cc,
    filename_suffix = f"_{dustsp}_batch",
    save_directory = saved_here,
    base_path = None,
    base_dir_name = None,
)

In [None]:
# -------------------------