In [3]:
import xarray as xr
import dask
import glob
import matplotlib.pyplot as plt
import numpy as np
from scipy import integrate
import matplotlib
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import pandas as pd

In [4]:
cluster_data_path = "/lus/scratch/shao/data/NEP36_extremes/processed/daily/by_year/by_cluster"
n_clusters = range(5,10)

shallows_ds_dict = {}
canyons_ds_dict = {}

for n_cluster in n_clusters:
    print(n_cluster)
    shallows_ds_dict[n_cluster] = xr.open_dataset(
                f"{cluster_data_path}/n_clusters_{n_cluster}/shallows.nc",
                engine="h5netcdf",
                chunks = {"time":730}
    ).persist()
    canyons_ds_dict[n_cluster] = xr.open_dataset(
                f"{cluster_data_path}/n_clusters_{n_cluster}/canyons.nc",
                engine="h5netcdf",
                chunks = {"time":730}
    ).persist()

5
6
7
8
9


In [5]:
downwelling_months = [10, 11, 12, 1, 2, 3]
upwelling_months   = [4, 5, 6, 7, 8, 9]
timeseries_vars = ['O2','OmegaA','T']

base_percentile = 0.1
percentiles = {
    'O2':base_percentile,
    'OmegaA':base_percentile,
    'T':1-base_percentile
}

In [6]:
def filter_by_season_and_values(ds, month_range):       
    ds_out = ds.where(ds['time.month'].isin(month_range),drop=True)
    return ds_out
    
def calculate_threshold(ds, percentile, nbins=1000):
    data = ds.to_numpy().flatten()
    hist, edges = np.histogram(data, bins=nbins, density=True)
    cdf = integrate.cumtrapz(hist,edges[1:])
    return np.interp(percentile, cdf, edges[2:])    


In [7]:
heading = "-"*5
# threshold_df = pd.DataFrame(columns = ["Number of Clusters", "Variable", "Season", "Threshold"])
thresholds = []

for n_cluster in n_clusters:
    print(n_cluster)
    
    downwelling_ds = filter_by_season_and_values(
        shallows_ds_dict[n_cluster],
        downwelling_months
    )
    upwelling_ds = filter_by_season_and_values(
        shallows_ds_dict[n_cluster],
        upwelling_months
    )

    for var in timeseries_vars:
        upwelling_threshold = calculate_threshold(upwelling_ds[var], percentiles[var])
        downwelling_threshold = calculate_threshold(downwelling_ds[var], percentiles[var])

        thresholds.append(
            {
                "Number of Clusters":n_cluster,
                "Variable": var,
                "Season": "upwelling",
                "Threshold": upwelling_threshold
            }
        )
        thresholds.append(
            {
                "Number of Clusters":n_cluster,
                "Variable": var,
                "Season": "downwelling",
                "Threshold": downwelling_threshold
            }
        )

shallows_threshold_df = pd.DataFrame(thresholds)

5


  cdf = integrate.cumtrapz(hist,edges[1:])


6


  cdf = integrate.cumtrapz(hist,edges[1:])


7


  cdf = integrate.cumtrapz(hist,edges[1:])


8


  cdf = integrate.cumtrapz(hist,edges[1:])


9


  cdf = integrate.cumtrapz(hist,edges[1:])


In [8]:
thresholds = []
for n_cluster in n_clusters:
    print(n_cluster)
    
    downwelling_ds = filter_by_season_and_values(
        canyons_ds_dict[n_cluster],
        downwelling_months
    )
    upwelling_ds = filter_by_season_and_values(
        canyons_ds_dict[n_cluster],
        upwelling_months
    )

    for var in timeseries_vars:
        upwelling_threshold = calculate_threshold(upwelling_ds[var], percentiles[var])
        downwelling_threshold = calculate_threshold(downwelling_ds[var], percentiles[var])

        thresholds.append(
            {
                "Number of Clusters":n_cluster,
                "Variable": var,
                "Season": "upwelling",
                "Threshold": upwelling_threshold
            }
        )
        thresholds.append(
            {
                "Number of Clusters":n_cluster,
                "Variable": var,
                "Season": "downwelling",
                "Threshold": downwelling_threshold
            }
        )

canyons_threshold_df = pd.DataFrame(thresholds)

5


  cdf = integrate.cumtrapz(hist,edges[1:])


6


  cdf = integrate.cumtrapz(hist,edges[1:])


7


  cdf = integrate.cumtrapz(hist,edges[1:])


8


  cdf = integrate.cumtrapz(hist,edges[1:])


9


  cdf = integrate.cumtrapz(hist,edges[1:])


In [9]:
shallows_threshold_df

Unnamed: 0,Number of Clusters,Variable,Season,Threshold
0,5,O2,upwelling,231.537604
1,5,O2,downwelling,257.021111
2,5,OmegaA,upwelling,1.571409
3,5,OmegaA,downwelling,1.491451
4,5,T,upwelling,13.938983
5,5,T,downwelling,10.606867
6,6,O2,upwelling,240.283134
7,6,O2,downwelling,259.964439
8,6,OmegaA,upwelling,1.619398
9,6,OmegaA,downwelling,1.497454


In [10]:
canyons_threshold_df

Unnamed: 0,Number of Clusters,Variable,Season,Threshold
0,5,O2,upwelling,60.335595
1,5,O2,downwelling,70.716595
2,5,OmegaA,upwelling,0.802316
3,5,OmegaA,downwelling,0.831659
4,5,T,upwelling,6.550771
5,5,T,downwelling,7.28737
6,6,O2,upwelling,58.792362
7,6,O2,downwelling,68.608881
8,6,OmegaA,upwelling,0.801375
9,6,OmegaA,downwelling,0.82765


In [11]:
print("Canyons")
for var in timeseries_vars:
    display(canyons_threshold_df[(canyons_threshold_df["Season"] == "upwelling") & (canyons_threshold_df["Variable"] == var)])
for var in timeseries_vars:
    display(canyons_threshold_df[(canyons_threshold_df["Season"] == "downwelling") & (canyons_threshold_df["Variable"] == var)])

Canyons


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
0,5,O2,upwelling,60.335595
6,6,O2,upwelling,58.792362
12,7,O2,upwelling,54.097558
18,8,O2,upwelling,49.267821
24,9,O2,upwelling,59.690382


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
2,5,OmegaA,upwelling,0.802316
8,6,OmegaA,upwelling,0.801375
14,7,OmegaA,upwelling,0.792717
20,8,OmegaA,upwelling,0.7808
26,9,OmegaA,upwelling,0.806843


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
4,5,T,upwelling,6.550771
10,6,T,upwelling,6.386468
16,7,T,upwelling,6.195425
22,8,T,upwelling,6.163624
28,9,T,upwelling,6.273955


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
1,5,O2,downwelling,70.716595
7,6,O2,downwelling,68.608881
13,7,O2,downwelling,63.460316
19,8,O2,downwelling,58.374627
25,9,O2,downwelling,69.206218


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
3,5,OmegaA,downwelling,0.831659
9,6,OmegaA,downwelling,0.82765
15,7,OmegaA,downwelling,0.814997
21,8,OmegaA,downwelling,0.802062
27,9,OmegaA,downwelling,0.831154


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
5,5,T,downwelling,7.28737
11,6,T,downwelling,7.064146
17,7,T,downwelling,6.818974
23,8,T,downwelling,6.800289
29,9,T,downwelling,6.92816


In [12]:
print("Shallows")
for var in timeseries_vars:
    display(shallows_threshold_df[(shallows_threshold_df["Season"] == "upwelling") & (shallows_threshold_df["Variable"] == var)])
for var in timeseries_vars:
    display(shallows_threshold_df[(shallows_threshold_df["Season"] == "downwelling") & (shallows_threshold_df["Variable"] == var)])

Shallows


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
0,5,O2,upwelling,231.537604
6,6,O2,upwelling,240.283134
12,7,O2,upwelling,242.080803
18,8,O2,upwelling,248.320393
24,9,O2,upwelling,248.144207


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
2,5,OmegaA,upwelling,1.571409
8,6,OmegaA,upwelling,1.619398
14,7,OmegaA,upwelling,1.629094
20,8,OmegaA,upwelling,1.664164
26,9,OmegaA,upwelling,1.663213


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
4,5,T,upwelling,13.938983
10,6,T,upwelling,14.065088
16,7,T,upwelling,14.096407
22,8,T,upwelling,14.235772
28,9,T,upwelling,14.231581


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
1,5,O2,downwelling,257.021111
7,6,O2,downwelling,259.964439
13,7,O2,downwelling,260.601726
19,8,O2,downwelling,262.757045
25,9,O2,downwelling,262.706991


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
3,5,OmegaA,downwelling,1.491451
9,6,OmegaA,downwelling,1.497454
15,7,OmegaA,downwelling,1.498529
21,8,OmegaA,downwelling,1.50226
27,9,OmegaA,downwelling,1.502254


Unnamed: 0,Number of Clusters,Variable,Season,Threshold
5,5,T,downwelling,10.606867
11,6,T,downwelling,10.649936
17,7,T,downwelling,10.662735
23,8,T,downwelling,10.720108
29,9,T,downwelling,10.71859
