# Test the sensitivity of MDM to varying amount of data

In [1]:
import numpy as np
import xarray as xr
from distributed import Client
import dask_jobqueue
import matplotlib.pyplot as plt
import matplotlib as mtplt
import glob
# import netCDF4 as nc
import zarr
import nc_time_axis
#import xskillscore as xs
import cartopy as cart
#np.set_printoptions(threshold=np.inf)
import matplotlib.colors as mcolors
import matplotlib.cm as cm
from datetime import datetime, timedelta
import pandas as pd

In [4]:
chic_lat  = 41.8781
chic_lon  = (360-87.6298)%360
ben_lat   = 12.9716
ben_lon   = 77.5946
########## CONUS ############
#CONUS lat-lon
top       = 50.0 # north lat
left      = -124.7844079+360 # west long
right     = -66.9513812+360 # east long
bottom    =  24.7433195 # south lat
################################
cesm2_path        = '/global/scratch/users/harsha/LENS/cesm2/tasmax/'
cvals             = '/global/scratch/users/harsha/LENS/cesm2/cvals/detrended/'
cmip6_cvals       = cvals + 'cmip6/'

'July 30'

In [5]:
############
pi_year  = 1865
eoc_year = 2086
#
doy  = 211
#############################
def no_leap_date(day_of_year):
    # Start from the first day of a non-leap year
    start_date = datetime(2021, 1, 1)

    # Calculate the actual date (subtract 1 because January 1st is day 1)
    actual_date = start_date + timedelta(days=day_of_year - 1)

    # Format the date to get 'Month Day'
    return actual_date.strftime('%B %d')
###############################
date = no_leap_date(doy)
date

'July 30'

In [6]:
def to_daily(ds):
    year       = ds.time.dt.year
    dayofyear  = ds.time.dt.dayofyear

    # assign new coords
    ds = ds.assign_coords(year=("time", year.data), dayofyear=("time", dayofyear.data))

    # reshape the array to (..., "day", "year")
    return ds.set_index(time=("year", "dayofyear")).unstack("time") 

In [7]:
def implement_mdm(ds_obs,init_mean,final_mean,init_std,final_std):
    # Assuming coordinates year and day for ds_obs
    obs_mean = ds_obs.mean('year')
    sratio   = final_std/init_std
    ds_mdm   = obs_mean + (final_mean - init_mean) + sratio*(ds_obs - obs_mean)        
    return ds_mdm

def implement_qdm(qobs, qinit, qfinal):
    # Implement quantile delta mapping. Assumes model data has corrd called 'mtime'
    #qinit    = init_data.quantile(quants,dim='mtime')
    #qfinal   = final_data.quantile(quants,dim='mtime')
    ds_qdm   = qobs + (qfinal - qinit)
    return ds_qdm

def is_sorted(arr):
    return np.all(arr[:-1] <= arr[1:]) or np.all(arr[:-1] >= arr[1:])

#
quants = np.linspace(0,1.0,30)
def compute_quantiles(ds, quantiles=quants):
    return ds.chunk(dict(year=-1)).quantile(quantiles, dim='year',skipna=False)

In [8]:
job_extra = ['--qos=cf_lowprio','--account=ac_cumulus'] 
#job_extra =['--qos=lr6_lowprio','--account=ac_cumulus','--constraint=lr6_m192']
#job_extra =['--qos=condo_cumulus_lr6','--account=lr_cumulus','--constraint=lr6_m192']
#job_extra =['--qos=lr_lowprio','--account=ac_cumulus']
cluster = dask_jobqueue.SLURMCluster(queue="cf1", cores=10, walltime='5:00:00', 
                local_directory='/global/scratch/users/harsha/dask_space/', 
                log_directory='/global/scratch/users/harsha/dask_space/', 
                job_extra_directives=job_extra, interface='eth0', memory="192GB") 
client  = Client(cluster) 
cluster.scale_up(2)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 42066 instead


In [9]:
pi_year0  = '1850'
pi_year1  = '1879'
ic_year0  = '2071'
ic_year1  = '2100'
pi_year   = 1865
eoc_year  = 2071
doy       = 211 # day_of_year

In [10]:
cluster

0,1
Dashboard: http://10.0.39.4:42066/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.0.39.4:39337,Workers: 0
Dashboard: http://10.0.39.4:42066/status,Total threads: 0
Started: Just now,Total memory: 0 B


### Load CESM2 data

In [None]:
%%time
ds_ssp  = xr.open_mfdataset(cesm2_path+'ssp370_cesm2tmax.zarr',engine='zarr')
ds_hist = xr.open_mfdataset(cesm2_path+'bhist_cesm2tmax.zarr',engine='zarr')
#
#t-temp, nw - no-warming, w-warming, hw = historical warming, g=global
tpi   = ds_hist.TREFHTMX.sel(method='nearest').sel(time=slice(pi_year0,pi_year1))
teoc  = ds_ssp.TREFHTMX.sel(method='nearest').sel(time=slice(ic_year0,ic_year1))
#
tdpi  = to_daily(tpi)
tdeoc = to_daily(teoc)

In [None]:
# Total range of numbers
total_range = list(range(1, 100))

# Generate the first list of N unique random numbers
N = 1
random.seed(42) 
model_list = random.sample(total_range, N)

# Remove these numbers from the total range to avoid repetition
remaining_range = [num for num in total_range if num not in model_list]

# Generate the second list of 10 unique random numbers from the remaining numbers
obs_list = random.sample(remaining_range, N)

model_list = model_list + [99,0]
obs_list   = obs_list + [0,1]

# Pair the numbers together
paired_numbers = list(zip(model_list, obs_list))
#

# Print the pairs
print("List of pairs:")
for pair in paired_numbers:
    print(pair)