In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from air2waterpy import air2water
from air2waterpy.metrics import calc_mse, calc_nse
import os
os.chdir("/work/pi_kandread_umass_edu/lake_temp_bias/satbias_model/satlswt")
from p_tqdm import p_map

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Global configuration
simulation_period = pd.date_range("2000-01-01", "2023-12-31")
param_dir = "/nas/cee-hydro/laketemp_bias/params/a2w_param_cloud"
sim_dir = "/nas/cee-hydro/laketemp_bias/simulations/a2w_cloud_sim"

# weather data
df_airtemp = pd.read_csv("/nas/cee-hydro/laketemp_bias/era5land/air_temp.csv", 
                         index_col=0, parse_dates=True)
# observation water temperature
df_tw = pd.read_csv("/nas/cee-hydro/laketemp_bias/era5land/water_temp_cloud.csv", 
                    index_col=0, parse_dates=True)
# hydrolake depth
hydrolake = pd.read_csv("data/cci_lakes_hydrolake_depth.csv", index_col = 0)
# list of cci lakes. Some lakes don't have ERA5-Land data, which are excluded
cci_lake_list = hydrolake.index.to_numpy().astype(np.int64)

In [3]:
def load_data(lake_id,
             ):
    # load air temperature
    ta = df_airtemp.loc[:, str(lake_id)]
    ta.index.name = "date"
    # load water temperature observation
    tw_obs = df_tw.loc[:, str(lake_id)]
    tw_obs.index.name = "date"
    # calculate daily mean temperature from tmax and tmin
    df = pd.concat([ta, tw_obs], axis = 1)
    df.columns = ["ta", "tw_obs"]
    # set negative temperature as 0 as the air2water model
    df["tw_obs"] = df["tw_obs"].clip(0,999)
    
    return df

In [4]:
def get_params(lake_id):
    param_df = pd.read_csv(f"{param_dir}/{lake_id}.csv", 
                           index_col = 0, 
                           usecols = ["Unnamed: 0", "a1", "a2", "a3", "a4", "a5", "a6"]
                          )
    param_df.index.name = "ensemble_id"
    return param_df
# get_params(1109)

In [5]:
def simulate_single(lake_id,
                    ensemble_id, # indicate which ensemble
                    sim_period = simulation_period, # whole period
                    tw_init = 0, # set Jan-01 temperature as 0
                    tw_ice = 0,
                    th = 4.0
                    ):
    """
    This function initialize a air2water model (6p)
    load air temperature
    load calibrated parameter 
    simulate water temperature.
    -- ta has to be the same length with sim_period
    """
    # initialize a model
    model = air2water(version="6p")
    
    # load air temp
    ta = load_data(lake_id).ta
    
    # parameter dataframe
    param_df = get_params(lake_id)
    
    # based on the ensemble id, read the parameter dict
    param_dict = param_df.iloc[ensemble_id].to_dict()
    
    # read the dict parameter
    model.load_params(param_dict)
    
    # simulate water temperature
    tw_sim = model.simulate(ta,
                            sim_period,
                            th = th,
                            tw_init = tw_init,
                            tw_ice = tw_ice, 
                            )
    tw_sim.columns = [f"tw_sim_{ensemble_id}"]
    return tw_sim

In [6]:
# simulate_single(1109, ensemble_id = 0)

In [7]:
def simulate_ensemble(lake_id, 
                      ensemble_num = 10,
                      save_dir = sim_dir,
                      sim_period = simulation_period, # whole period
                      tw_init = 0, # set Jan-01 temperature as 0
                      tw_ice = 0,
                      th = 4.0,
                      ):
    tw_sim_df = pd.DataFrame([])
    for ensemble_id in range(ensemble_num):
        tw_sim = simulate_single(lake_id, 
                        ensemble_id, # indicate which ensemble
                        sim_period = sim_period, # whole period
                        tw_init = tw_init, # set Jan-01 temperature as 0
                        tw_ice = tw_ice,
                        th = th)
        tw_sim_df = pd.concat([tw_sim_df, tw_sim], axis = 1)
        
    if save_dir == None:
        return tw_sim_df
    else:
        tw_sim_df.to_csv(f"{sim_dir}/{lake_id}.csv")
        return

In [8]:
output = p_map(simulate_ensemble, cci_lake_list, num_cpus = 4)

100%|██████████| 2016/2016 [07:02<00:00,  4.77it/s]
