#### Bias testing ####

Testing the bias of the monthly DePreSys data relative to ERA5 for tas, sfcWind and rsds, over different lead times and different months.

In [None]:
import os
import sys
import glob
import importlib
from tqdm import tqdm

# Import relevant libraries
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr

# Optional dask stuff
import dask_gateway
import pathlib

In [None]:
# Import the functsions
sys.path.append("/home/users/benhutch/unseen_functions/")

# import the bias functions
import bias_functions as bf

In [None]:
# import the dictionaries
sys.path.append("/home/users/benhutch/unseen_functions/")

# import the dictionaries
import unseen_dictionaries as udicts

In [None]:
# # Create a connection to dask-gateway.
# gw = dask_gateway.Gateway("https://dask-gateway.jasmin.ac.uk", auth="jupyterhub")

In [None]:
# # Inspect and change the options if required before creating your cluster.
# options = gw.cluster_options()
# options.worker_cores = 2

In [None]:
# clusters = gw.list_clusters()
# if not clusters:
#     cluster = gw.new_cluster(options, shutdown_on_close=False)
# else:
#     cluster = gw.connect(clusters[0].name)
    
# cluster.adapt(minimum=1, maximum=3)

# client = cluster.get_client()

In [None]:
# client.get_versions(check=True)

In [None]:
# client

In [None]:
# set up the hard coded variables
model = "HadGEM3-GC31-MM"
variable = "tas"
obs_variable = "t2m"
lead_time = 1
# init_years = [1960]
init_years = np.arange(1960, 1970 + 1)
experiment = "dcppA-hindcast"
frequency = "Amon"
engine = "netcdf4"
parallel = False

test_file = "/gws/nopw/j04/canari/users/benhutch/dcppA-hindcast/data/tas/HadGEM3-GC31-MM/merged_files/tas_Amon_HadGEM3-GC31-MM_dcppA-hindcast_s1960-r1i1p1f2_gn_196011-197103.nc"

obs_fpath = "/home/users/benhutch/ERA5/adaptor.mars.internal-1691509121.3261805-29348-4-3a487c76-fc7b-421f-b5be-7436e2eb78d7.nc"

In [None]:
# %%time

# # test the load data function
# ds = bf.load_dcpp_data_lead(
#     model=model,
#     variable=variable,
#     lead_time=lead_time,
#     init_years=init_years,
#     experiment=experiment,
#     frequency=frequency,
#     engine=engine,
#     parallel=False,
# )

In [None]:
# ds

In [None]:
# # reload the functions
# importlib.reload(bf)

In [None]:
# %%time

# # Regrid the data
# ds = bf.regrid_ds(
#     ds=ds,
#     variable=variable,
# )

In [None]:
# %%time

# # select the gridbox
# ds = bf.select_gridbox(
#     ds=ds,
#     grid=udicts.eu_grid_constrained,
#     calc_mean=False,
# )

In [None]:
# %%time

# # Load the test ds
# test_ds = xr.open_dataset(test_file)

# # Test the load and regrid obs function
# obs = bf.load_and_rg_obs(
#     model_ds=test_ds,
#     obs_variable=obs_variable,
#     obs_path=obs_fpath,
#     init_years=init_years,
#     lead_time=lead_time,
#     rg_algo="bilinear",
#     grid_bounds=[-180.0, 180.0, -90.0, 90.0],
#     periodic=True,
#     parallel=False,
# )

In [None]:
# # Select the gridbox for the obs
# obs = bf.select_gridbox(
#     ds=obs,
#     grid=udicts.eu_grid_constrained,
#     calc_mean=False,
# )

In [None]:
# # reload the functions using importlib
# importlib.reload(bf)

In [None]:
# # test the save_data functiomn
# bf.save_data(
#     model_ds=ds,
#     obs_ds=obs,
#     model="HadGEM3-GC31-MM",
#     experiment="dcppA-hindcast",
#     frequency="Amon",
#     variable="tas",
#     init_years=init_years,
#     lead_time=lead_time,
# )

In [None]:
# set up the path
path = "/work/scratch-nopw2/benhutch/test_nc/"

# List the most recent files in the directory
files = sorted(os.listdir(path))

# print the files
print(files)

In [None]:
# Set up the obs fname
obs_fname = "obs_rsds_s1960-2018_Amon_20240531T095531.nc"

# Set up the model fname
model_fname = "HadGEM3-GC31-MM_dcppA-hindcast_rsds_s1960-2018_lead3_Amon_20240531T095531.nc"

# # # assert that the number 20240530T211417 is the same in both files
# assert obs_fname.split("_")[-1] == model_fname.split("_")[-1]

# load in the .nc files
obs_scratch = xr.open_dataset(os.path.join(path, obs_fname))
model_scratch = xr.open_dataset(os.path.join(path, model_fname))

In [None]:
# reload the functions using importlib
importlib.reload(bf)

In [None]:
# %%time

# # calculate and plot the bias
# bf.calc_and_plot_bias(
#     model_ds=model_scratch["__xarray_dataarray_variable__"],
#     obs_ds=obs_scratch["__xarray_dataarray_variable__"],
#     month_idx=12,
#     lead_time=lead_time,
#     init_years=np.arange(1960, 2018 + 1),
#     variable=variable,
#     month_name="October",
#     figsize=(12, 6),
#     save_dir="/gws/nopw/j04/canari/users/benhutch/plots/",
#     save=False,
# )

In [None]:
# reload the functions using importlib
importlib.reload(bf)

In [None]:
%%time

# Test the function for calculating and plotting all of the biases
bf.calc_and_plot_bias_all_months(
    model_ds=model_scratch["__xarray_dataarray_variable__"],
    obs_ds=obs_scratch["__xarray_dataarray_variable__"],
    lead_time=3,
    init_years=np.arange(1960, 2018 + 1),
    variable="rsds",
    month_names=udicts.month_names_HadGEM3,
    mean_or_std="std",
    figsize=(10, 10),
    save_dir="/gws/nopw/j04/canari/users/benhutch/plots/",
    save=False,
    vmin_set=-28,
    vmax_set=28,
)

