In [1]:
import xarray as xr
import dask
import glob
import matplotlib.pyplot as plt
import numpy as np
from scipy import integrate
import matplotlib
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import pandas as pd

In [2]:
cluster_data_path = "/lus/scratch/shao/data/NEP36_extremes/processed/daily/by_year/by_cluster"
n_clusters = range(5,10)

shallows_ds_dict = {}
canyons_ds_dict = {}

for n_cluster in n_clusters:
    print(n_cluster)
    shallows_ds_dict[n_cluster] = xr.open_dataset(
                f"{cluster_data_path}/n_clusters_{n_cluster}/shallows.nc",
                engine="h5netcdf",
                chunks = {"time":730}
    ).persist()
    canyons_ds_dict[n_cluster] = xr.open_dataset(
                f"{cluster_data_path}/n_clusters_{n_cluster}/canyons.nc",
                engine="h5netcdf",
                chunks = {"time":730}
    ).persist()

5
6
7
8
9


In [3]:
downwelling_months = [10, 11, 12, 1, 2, 3]
upwelling_months   = [4, 5, 6, 7, 8, 9]
timeseries_vars = ['O2','OmegaA','T']

base_percentile = 0.1
percentiles = {
    'O2':base_percentile,
    'OmegaA':base_percentile,
    'T':1-base_percentile
}

In [4]:
def filter_by_season_and_values(ds, month_range):       
    ds_out = ds.where(ds['time.month'].isin(month_range),drop=True)
    return ds_out
    
def calculate_threshold(ds, percentile, nbins=1000):
    data = ds.to_numpy().flatten()
    hist, edges = np.histogram(data, bins=nbins, density=True)
    cdf = integrate.cumulative_trapezoid(hist,edges[1:])
    return np.interp(percentile, cdf, edges[2:])    


In [5]:
heading = "-"*5
# threshold_df = pd.DataFrame(columns = ["Number of Clusters", "Variable", "Season", "Threshold"])
thresholds = []

for n_cluster in n_clusters:
    print(n_cluster)
    
    downwelling_ds = filter_by_season_and_values(
        shallows_ds_dict[n_cluster],
        downwelling_months
    )
    upwelling_ds = filter_by_season_and_values(
        shallows_ds_dict[n_cluster],
        upwelling_months
    )

    for var in timeseries_vars:
        upwelling_threshold = calculate_threshold(upwelling_ds[var], percentiles[var])
        downwelling_threshold = calculate_threshold(downwelling_ds[var], percentiles[var])

        thresholds.append(
            {
                "Number of Clusters":n_cluster,
                "Variable": var,
                "Season": "upwelling",
                "Threshold": upwelling_threshold
            }
        )
        thresholds.append(
            {
                "Number of Clusters":n_cluster,
                "Variable": var,
                "Season": "downwelling",
                "Threshold": downwelling_threshold
            }
        )

shallows_threshold_df = pd.DataFrame(thresholds)

5
6
7
8
9


In [6]:
thresholds = []
for n_cluster in n_clusters:
    print(n_cluster)
    
    downwelling_ds = filter_by_season_and_values(
        canyons_ds_dict[n_cluster],
        downwelling_months
    )
    upwelling_ds = filter_by_season_and_values(
        canyons_ds_dict[n_cluster],
        upwelling_months
    )

    for var in timeseries_vars:
        upwelling_threshold = calculate_threshold(upwelling_ds[var], percentiles[var])
        downwelling_threshold = calculate_threshold(downwelling_ds[var], percentiles[var])

        thresholds.append(
            {
                "Number of Clusters":n_cluster,
                "Variable": var,
                "Season": "upwelling",
                "Threshold": upwelling_threshold
            }
        )
        thresholds.append(
            {
                "Number of Clusters":n_cluster,
                "Variable": var,
                "Season": "downwelling",
                "Threshold": downwelling_threshold
            }
        )

canyons_threshold_df = pd.DataFrame(thresholds)

5
6
7
8
9


In [7]:
shallows_threshold_df

Unnamed: 0,Number of Clusters,Variable,Season,Threshold
0,5,O2,upwelling,232.914372
1,5,O2,downwelling,257.166863
2,5,OmegaA,upwelling,1.443585
3,5,OmegaA,downwelling,1.35045
4,5,T,upwelling,13.95953
5,5,T,downwelling,10.637939
6,6,O2,upwelling,240.824222
7,6,O2,downwelling,259.853772
8,6,OmegaA,upwelling,1.487907
9,6,OmegaA,downwelling,1.355312


In [8]:
canyons_threshold_df

Unnamed: 0,Number of Clusters,Variable,Season,Threshold
0,5,O2,upwelling,60.300207
1,5,O2,downwelling,70.546476
2,5,OmegaA,upwelling,0.653069
3,5,OmegaA,downwelling,0.681538
4,5,T,upwelling,6.547437
5,5,T,downwelling,7.276429
6,6,O2,upwelling,58.68975
7,6,O2,downwelling,68.38255
8,6,OmegaA,upwelling,0.651568
9,6,OmegaA,downwelling,0.677259


In [9]:
print("Canyons")
for var in timeseries_vars:
    display(canyons_threshold_df[(canyons_threshold_df["Season"] == "upwelling") & (canyons_threshold_df["Variable"] == var)])
for var in timeseries_vars:
    display(canyons_threshold_df[(canyons_threshold_df["Season"] == "downwelling") & (canyons_threshold_df["Variable"] == var)])

Canyons


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
0,5,O2,upwelling,60.300207
6,6,O2,upwelling,58.68975
12,7,O2,upwelling,54.964282
18,8,O2,upwelling,49.923376
24,9,O2,upwelling,59.696112


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
2,5,OmegaA,upwelling,0.653069
8,6,OmegaA,upwelling,0.651568
14,7,OmegaA,upwelling,0.644866
20,8,OmegaA,upwelling,0.63274
26,9,OmegaA,upwelling,0.65681


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
4,5,T,upwelling,6.547437
10,6,T,upwelling,6.379052
16,7,T,upwelling,6.209664
22,8,T,upwelling,6.155874
28,9,T,upwelling,6.271169


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
1,5,O2,downwelling,70.546476
7,6,O2,downwelling,68.38255
13,7,O2,downwelling,64.289429
19,8,O2,downwelling,59.040501
25,9,O2,downwelling,69.102881


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
3,5,OmegaA,downwelling,0.681538
9,6,OmegaA,downwelling,0.677259
15,7,OmegaA,downwelling,0.667316
21,8,OmegaA,downwelling,0.65391
27,9,OmegaA,downwelling,0.680898


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
5,5,T,downwelling,7.276429
11,6,T,downwelling,7.049255
17,7,T,downwelling,6.828581
23,8,T,downwelling,6.780865
29,9,T,downwelling,6.918613


In [10]:
print("Shallows")
for var in timeseries_vars:
    display(shallows_threshold_df[(shallows_threshold_df["Season"] == "upwelling") & (shallows_threshold_df["Variable"] == var)])
for var in timeseries_vars:
    display(shallows_threshold_df[(shallows_threshold_df["Season"] == "downwelling") & (shallows_threshold_df["Variable"] == var)])

Shallows


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
0,5,O2,upwelling,232.914372
6,6,O2,upwelling,240.824222
12,7,O2,upwelling,241.934641
18,8,O2,upwelling,249.270219
24,9,O2,upwelling,249.123808


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
2,5,OmegaA,upwelling,1.443585
8,6,OmegaA,upwelling,1.487907
14,7,OmegaA,upwelling,1.494154
20,8,OmegaA,upwelling,1.537534
26,9,OmegaA,upwelling,1.536489


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
4,5,T,upwelling,13.95953
10,6,T,upwelling,14.078166
16,7,T,upwelling,14.097893
22,8,T,upwelling,14.26802
28,9,T,upwelling,14.263835


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
1,5,O2,downwelling,257.166863
7,6,O2,downwelling,259.853772
13,7,O2,downwelling,260.267177
19,8,O2,downwelling,262.895241
25,9,O2,downwelling,262.820802


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
3,5,OmegaA,downwelling,1.35045
9,6,OmegaA,downwelling,1.355312
15,7,OmegaA,downwelling,1.355871
21,8,OmegaA,downwelling,1.359866
27,9,OmegaA,downwelling,1.359716


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
5,5,T,downwelling,10.637939
11,6,T,downwelling,10.680207
17,7,T,downwelling,10.687544
23,8,T,downwelling,10.756721
29,9,T,downwelling,10.755384
