In [34]:
import xarray as xr
import re
from collections import defaultdict
import sys, os
sys.path.append(os.path.dirname(os.getcwd()))
from ESMplot.watertagging.combine_tagged_regions import (
    combine_regions_to_new_tag
)

In [3]:
## Imports
# BK 1850 tags
bk = xr.open_dataset('/net/paleonas.wustl.edu/volume1/blkshare/ajthompson/postproc/' \
'f.e12.F_1850_CAM5.wiso.f19.0ka.002.watertags.2.cam.h0.0006-0025.climo.nc')

# aaf 2090 tags
aaf = xr.open_dataset('/RAID/datasets/f.ie12.BRCP85C5CN.f19_g16.LME.004_2100watertags.004/archive/atm/hist/climatology/' \
'f.ie12.BRCP85C5CN.f19_g16.LME.004_2100watertags.004.cam.h0.2105-2124_monthly_climatology_cat.nc')

In [34]:
# Average for 
eras_ds = combine_regions_to_new_tag(
    aaf,
    regions=("EURO", "NASA", "INDA", "SASA"),
    new_region="ERAS",
    # weights={"EURO":1, "NASA":1, "INDA":1, "SASA":1},  # optional
    require_all=True,       # set True if you only want averages when *all* are present
    keep_nonregion_vars=False # keep other non-region variables
)

In [35]:
eras_ds

In [5]:
# Average for 
NAMG_ds = combine_regions_to_new_tag(
    aaf,
    regions=("WNAM", "ENAM"),
    new_region="NAMG",
    # weights={"EURO":1, "NASA":1, "INDA":1, "SASA":1},  # optional
    require_all=True,       # set True if you only want averages when *all* are present
    keep_nonregion_vars=False # keep other non-region variables
)

In [6]:
NAMG_ds

In [14]:
# Average for 
NATL_ds = combine_regions_to_new_tag(
    aaf,
    regions=("WNAT", "ENAT"),
    new_region="NATL",
    # weights={"EURO":1, "NASA":1, "INDA":1, "SASA":1},  # optional
    inherit_attrs="consensus",
    consensus_only_keys={"units"},  # require 'units' to agree across inputs
    prefer_keys_first_present=("long_name","cell_methods"),  # but take these from preferred source
    keep_nonregion_vars=False # keep other non-region variables
)


In [15]:
NATL_ds

# Check results of combining

In [None]:
ds = xr.open_dataset('/RAID/datasets/f.ie12.BRCP85C5CN.f19_g16.LME.004_2100watertags.004/archive/atm/hist/climatology/' \
'f.ie12.BRCP85C5CN.f19_g16.LME.004_2100watertags.004.cam.h0.2105-2124_monthly_climatology_cat.nc')

In [48]:
ds = xr.open_dataset('/RAID/datasets/f.ie12.BRCP85C5CN.f19_g16.LME.004_2100watertags.004/products/' \
'f.ie12.BRCP85C5CN.f19_g16.LME.004_2100watertags.004.cam.h0.2105-2124_monClim_combReg.nc')

In [49]:
ds

# Explore blk0ka

In [35]:
bk = xr.open_dataset('/net/paleonas.wustl.edu/volume1/blkshare/ajthompson/postproc/' \
'f.e12.F_1850_CAM5.wiso.f19.0ka.002.watertags.2.cam.h0.0006-0025.climo.nc')

In [19]:
bk

In [46]:
bk_comb = xr.open_dataset('/RAID/datasets/f.ie12.BRCP85C5CN.f19_g16.LME.004_2100watertags.004/products/' \
'f.e12.F_1850_CAM5.wiso.f19.0ka.002.watertags.2.cam.h0.0006-0025.climo_combReg.nc')

In [47]:
bk_comb

In [45]:
# Variables in ds1 but not in ds2
diff1 = set(ds.data_vars) - set(bk_comb.data_vars)

# Variables in ds2 but not in ds1
diff2 = set(bk_comb.data_vars) - set(ds.data_vars)

print("In ds only:", diff1)
print("In bk_comb only:", diff2)

In ds only: {'SST'}
In bk_comb only: set()


In [29]:
ds = xr.open_dataset('/net/paleonas.wustl.edu/volume1/blkshare/ajthompson/postproc/' \
'f.e12.F_1850_CAM5.wiso.f19.0ka.002.watertags.2.cam.h0.0006-0025.climo.nc')

## Combine regions
# Sundaland
ds_combined = combine_regions_to_new_tag(
    ds,
    regions=("SLNW", "SLNE", "SLSW", "SLSE"),
    new_region="SLCB",
    inherit_attrs="consensus",
    consensus_only_keys={"units"},  # require 'units' to agree across inputs
    prefer_keys_first_present=("long_name","cell_methods"),  # but take these from preferred source
    keep_nonregion_vars=True # keep other non-region variables
)

In [32]:
ds_combined

# Investigate "cases" coordinate issue

In [57]:
import numpy as np, xarray as xr
dir = '/RAID/datasets/f.ie12.BRCP85C5CN.f19_g16.LME.004_2100watertags.004/products/'

# File paths and names for each case
# 20yr water tagging experiments (cam only)
CASES = [dir + 'f.e12.F_1850_CAM5.wiso.f19.0ka.002.watertags.2.cam.h0.0006-0025.climo_combReg.nc', 
         dir + 'f.ie12.BRCP85C5CN.f19_g16.LME.004_2100watertags.004.cam.h0.2105-2124_monClim_combReg.nc']

In [58]:
ds0 = xr.open_dataset(CASES[0])
ds1 = xr.open_dataset(CASES[1])  # or the cleaned copy
assert np.array_equal(ds0.lat.values, ds1.lat.values)
assert np.array_equal(ds0.lon.values, ds1.lon.values)

In [56]:
ds0.lat.values - ds1.lat.values

array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  3.55271368e-15,
        0.00000000e+00, -3.55271368e-15,  0.00000000e+00,  0.00000000e+00,
       -3.55271368e-15,  7.10542736e-15,  3.55271368e-15,  0.00000000e+00,
       -1.77635684e-15, -5.32907052e-15,  7.10542736e-15,  3.55271368e-15,
        8.88178420e-16, -1.77635684e-15, -4.88498131e-15,  6.43929354e-15,
        3.55271368e-15,  8.88178420e-16, -1.77635684e-15, -5.32907052e-15,
        7.10542736e-15,  

In [54]:
ds0.lat

In [55]:
ds1.lat