In [1]:
# %load_ext autoreload
# %autoreload 2
%matplotlib inline
import numpy as np
import time
import shutil

import warnings
import intake
import pathlib
import xarray as xr
import pandas as pd
import cf_xarray
import dask
# dask.config.set({"array.slicing.split_large_chunks": True}) # avoid large chunks to be created.

import matplotlib.pyplot as plt
from fastjmd95 import rho

from dask.diagnostics import ProgressBar
import matplotlib.pyplot as plt

from fastprogress.fastprogress import progress_bar

from xarrayutils.file_handling import (
    write,
    maybe_create_folder,
    file_exist_check,
    temp_write_split,
)
from xarrayutils.utils import (
    remove_bottom_values,
    #mask_mixedlayer
)
from cmip6_preprocessing.preprocessing import (
    combined_preprocessing
)
from cmip6_preprocessing.drift_removal import (
    remove_trend,
    match_and_remove_trend
)
from cmip6_preprocessing.utils import (
    cmip6_dataset_id
)

from cmip6_preprocessing.postprocessing import (
    combine_datasets,
    concat_experiments,
    match_metrics,
    merge_variables,
    interpolate_grid_label,
)
from cmip6_preprocessing.drift_removal import match_and_remove_trend

import sys
sys.path.append("../../")
from cmip6_omz.upstream_stash import (
    transform_wrapper,
    pick_first_member,
    construct_static_dz
)
from cmip6_omz.omz_tools import (
    omz_thickness,
    sigma_bins,
    align_missing,
    preprocessing_wrapper,
    vol_consistency_check_wrapper
)

from cmip6_omz.utils import (
    cmip6_collection,
    o2_models,
)

from cmip6_omz.plotting import plot_omz_results

## Notes

At this point I am just trying to get these results for discussion. 

- GFDL ESM4: Seems to work for now with reduced workers and small write chunks
- GFDL CM4: Immediately crashes.
    - [ ] Try to write out the 'raw' dataset in a rechunked form earlier in the pipline
- MPI HR: Lets see how this one fairs...

## What I have done:
- Remove all old refs to the other repos
- Refactoring of the metrics matching
- Using only the regridding to combine variables 
    - Need to patch in Norwegian models
- Single cell for filtering/checking all datasets for required vars/metrics
    - This also logs all the problems in one place


## TODO:
- [ ] Fix the 'gr' only combination of the norwegian models in cmip6_pp
- [ ] Test with netcdf archive (or at least update the zarr? Not actually sure this is worth it anymore...I will migrate to the cloud eventually. But we might need the age for CM4/ESM4?
- [ ] CM4 age is chunked badly...
- [ ] Figure out how to deal with the access data properly (thickness concat fails)...
- [x] Try with new trends
- [x] **The damn norwegian models have no area...**
- [x] Can I check each variable for the giant chunks after concatting?

In [2]:
from distributed import Client, LocalCluster

# # This is optimized so that 
# mem = '32 GiB' #per worker
# # workers = 5
# threads = 4
# cluster = LocalCluster(
#     memory_limit=mem,
#     dashboard_address=9999,
#     threads_per_worker=threads,
#                       )
# cluster.scale(4)
# client = Client(cluster)
# client

# Customized for CM4 (which is chunked super stupidly...)
mem = '96 GiB' #per worker
# workers = 5
threads = 4
cluster = LocalCluster(
    memory_limit=mem,
    dashboard_address=9999,
    threads_per_worker=threads,
                      )
cluster.scale(3)
client = Client(cluster)
client


# import dask
# from multiprocessing.pool import ThreadPool
# dask.config.set(pool=ThreadPool(6))

0,1
Connection method: Cluster object,Cluster type: LocalCluster
Dashboard: http://127.0.0.1:9999/status,

0,1
Status: running,Using processes: True
Dashboard: http://127.0.0.1:9999/status,Workers: 3
Total threads:  12,Total memory:  288.00 GiB

0,1
Comm: tcp://127.0.0.1:42728,Workers: 3
Dashboard: http://127.0.0.1:9999/status,Total threads:  12
Started:  Just now,Total memory:  288.00 GiB

0,1
Comm: tcp://127.0.0.1:40753,Total threads: 4
Dashboard: http://127.0.0.1:35297/status,Memory: 96.00 GiB
Nanny: tcp://127.0.0.1:43719,
Local directory: /projects/GEOCLIM/LRGROUP/jbusecke/projects/cmip6_omz/notebooks/julius/dask-worker-space/worker-hlm1emgf,Local directory: /projects/GEOCLIM/LRGROUP/jbusecke/projects/cmip6_omz/notebooks/julius/dask-worker-space/worker-hlm1emgf

0,1
Comm: tcp://127.0.0.1:46147,Total threads: 4
Dashboard: http://127.0.0.1:37834/status,Memory: 96.00 GiB
Nanny: tcp://127.0.0.1:36923,
Local directory: /projects/GEOCLIM/LRGROUP/jbusecke/projects/cmip6_omz/notebooks/julius/dask-worker-space/worker-od918tgp,Local directory: /projects/GEOCLIM/LRGROUP/jbusecke/projects/cmip6_omz/notebooks/julius/dask-worker-space/worker-od918tgp

0,1
Comm: tcp://127.0.0.1:46672,Total threads: 4
Dashboard: http://127.0.0.1:42380/status,Memory: 96.00 GiB
Nanny: tcp://127.0.0.1:39227,
Local directory: /projects/GEOCLIM/LRGROUP/jbusecke/projects/cmip6_omz/notebooks/julius/dask-worker-space/worker-7c_vpejd,Local directory: /projects/GEOCLIM/LRGROUP/jbusecke/projects/cmip6_omz/notebooks/julius/dask-worker-space/worker-7c_vpejd


# Develop functions here

In [1]:
from cmip6_omz.upstream_stash import append_write_zarr

ImportError: cannot import name 'append_write_zarr' from 'cmip6_omz.upstream_stash' (/projects/GEOCLIM/LRGROUP/jbusecke/projects/cmip6_omz/cmip6_omz/upstream_stash.py)

In [4]:
def resample_yearly(ds_in, freq="1AS"):
    # this drops some coordinates, so i need to convert them to data_vars and then reconvert
    time_coords = [
        co
        for co in list(ds_in.coords)
        if "time" in ds_in[co].dims and co not in ["time", "time_bounds"]
    ]
    ds_out = ds_in.reset_coords(time_coords).coarsen(time=12).mean()
    ds_out = ds_out.assign_coords({co: ds_out[co] for co in time_coords})
    ds_out.attrs.update({k: v for k, v in ds_in.attrs.items() if k not in ["table_id"]})
    return ds_out

In [5]:
def is_zarr(fn):
    extension = fn.split('.')[-1]
    if extension == 'nc':
        is_zarr = False
    elif extension == 'zarr':
        is_zarr = True
    else:
        raise RuntimeError('Unrecognized File Extension')
    return is_zarr

def reload_preexisting(filename, overwrite = True):
    print("Skipping. File exists already.")
    if is_zarr(filename):
        ds_sigma_reloaded = xr.open_zarr(
            filename, use_cftime=True, consolidated=True
        )
    else:
        ds_sigma_reloaded = xr.open_dataset(
            filename, use_cftime = True
        )
        try:
            plot_omz_results(ds_sigma_reloaded)
        except Exception as e:
            print(f"Plotting failed with: {e}")
    return ds_sigma_reloaded
    
def strip_encoding(ds):
    """Strips the encoding from xr.dataset... This seems like a bug to me."""
    for var in ds.variables:
        ds[var].encoding = {}
    ds.encoding = {}
    return ds

### Local convenience functions for final cell

# Start pipeline here

In [6]:
# shutil.rmtree('../../data/temp/scratch_temp/')

In [7]:
foldername = "fine_density_tests_combined_2"
# ofolder = maybe_create_folder(f"../../data/external/{foldername}")
ofolder = maybe_create_folder(f"../../data/processed/{foldername}")

tempfolderpath = f"../../data/temp/scratch_temp/{foldername}"
# try out a different folder for now
# Clean out the tempfolder to avoid bloating that thing
shutil.rmtree(tempfolderpath)
# recreate fresh
tempfolder = maybe_create_folder(tempfolderpath)

# global parameters
o2_bins = np.array([10, 40, 60, 80, 100, 120])
fine_sigma_bins = sigma_bins()



In [8]:
col = intake.open_esm_datastore(cmip6_collection(zarr=True)) #TODO: Check with nc files

In [9]:
o2_models()

['CanESM5-CanOE',
 'CanESM5',
 'CNRM-ESM2-1',
 'ACCESS-ESM1-5',
 'MPI-ESM-1-2-HAM',
 'IPSL-CM6A-LR',
 'MIROC-ES2L',
 'UKESM1-0-LL',
 'MPI-ESM1-2-HR',
 'MPI-ESM1-2-LR',
 'MRI-ESM2-0',
 'NorCPM1',
 'NorESM1-F',
 'NorESM2-LM',
 'NorESM2-MM',
 'GFDL-CM4',
 'GFDL-ESM4']

In [10]:
# if this does not work on jupyter.rc, we can add some logic to 
col = intake.open_esm_datastore(cmip6_collection(zarr=True)) #TODO: Check with nc files

z_kwargs={"decode_times": True, "use_cftime": True, "consolidated": True}
n_kwargs={"decode_times": True, "use_cftime": True, 'chunks':{'time':1}}

variable_ids = ["thetao", "so", "o2", "agessc"] #"mlotst"
metric_variable_ids = ["thkcello", "areacello"] #"mlotst"

# models = o2_models()
# models = ['GFDL-ESM4', 'GFDL-CM4', 'ACCESS-ESM1-5']#`,# # shorter test run....,
# models = [m for m in o2_models() if 'GFDL-ESM4' in m or 'Nor' in m]
# models = [m for m in o2_models() if ('ACCESS' not in m and 'GFDL' not in m and 'HR' not in m)]
models = [
#     'MPI-ESM1-2-HR',
#     'MRI-ESM2-0',
#     'NorESM2-LM',
    'GFDL-CM4',
#     'GFDL-ESM4',
]

cat = col.search(
    source_id = models,
    grid_label=["gr", "gn"],
    experiment_id=["historical", "ssp585"],
    table_id=["Omon"],
    variable_id=variable_ids,
)
ds_dict = cat.to_dataset_dict(
        aggregate=False,
        zarr_kwargs=z_kwargs,
        cdf_kwargs=n_kwargs,
        preprocess=combined_preprocessing,
    )

# make a separate metric dict to catch all possible metrics!
cat_metrics = col.search(source_id=models,variable_id=metric_variable_ids)
ds_metric_dict = cat_metrics.to_dataset_dict(
        aggregate=False,
        zarr_kwargs=z_kwargs,
        cdf_kwargs=n_kwargs,
        preprocess=combined_preprocessing,
    )


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.version.zstore'



--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.version.zstore'


## Rechunk the data

This might cause a bunch of problems....I should really only do that for some datasets...

In [11]:
# def rechunk(ds):
#     if 'time' in ds.dims:
#         return ds.chunk({'time':1})
#     else:
#         return ds

# ds_dict = {k: rechunk(ds) for k,ds in ds_dict.items()}
# ds_metric_dict = {k: rechunk(ds) for k,ds in ds_metric_dict.items()}

In [12]:
# new files (change in later and get rid of `load_trend_dict` (or refactor?) and `fix_trend_metadata`)
# Load all trend files
flist = list(pathlib.Path('../../data/external/cmip6_control_drifts/').absolute().glob('*.nc'))
flist = [f for f in flist if any([v in str(f) for v in variable_ids])]
trend_dict = {}
for f in progress_bar(flist):
    trend_dict[f.stem] = xr.open_mfdataset([f])
#     trend_dict[f.stem] = xr.open_dataset(f)

In [14]:
ddict_tracers_detrended = match_and_remove_trend(
    ds_dict,
    trend_dict,
)

## Match metrics (there are still quite a few missing).

In [15]:
# this one causes problems because the time is not as long as the full data...
ddict_tracers_detrended_filtered = {k:ds.squeeze() for k, ds in ddict_tracers_detrended.items() if not ("CNRM-ESM2-1" in k and "r6i1p1f2" in k)}

In [16]:
print('matching metrics\n')
ddict_matched = match_metrics(ddict_tracers_detrended_filtered, ds_metric_dict, ['areacello', 'thkcello'], print_statistics=True)

matching metrics

Processed 12 datasets.
Exact matches:{'areacello': 0, 'thkcello': 0}
Other matches:{'areacello': 12, 'thkcello': 0}
No match found:{'areacello': 0, 'thkcello': 12}




Do I need to rechunk here for the high res models? I am currently doing this for CM4 and ESM4, but I might have to adjust the source data...

In [17]:
print('interpolate grids\n')
ddict_matched_regrid = interpolate_grid_label(ddict_matched, merge_kwargs={'compat':'override'}) # This should be a default soon

interpolate grids



  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


In [18]:
#patch the norwegian model in manually
ddict_patch = merge_variables(ddict_matched)
for name, ds in ddict_patch.items():
    if 'Nor' in name and 'gr' in name:
        patch_name = name.replace('.gr','')
        ddict_matched_regrid[patch_name] = ds

In [19]:
np.sort(list(ddict_matched_regrid.keys()))

array(['GFDL-CM4.historical.Omon.r1i1p1f1',
       'GFDL-CM4.ssp585.Omon.r1i1p1f1'], dtype='<U33')

## Concatenate experiments and pick the first full one

In [20]:
# somehow xarray cannot deal with comparing list/int attrs (Occurs in CM4)
# I should raise that, but lets fix it quickly here
def clean_attrs(ds):
    for a, attr in ds.attrs.items():
        if isinstance(attr, int):
            ds.attrs[a] = [attr]
    return ds

ddict_matched_regrid = {k:clean_attrs(ds) for k, ds in ddict_matched_regrid.items()}

ddict_ex_combined = concat_experiments(
    ddict_matched_regrid,
    concat_kwargs={
        'combine_attrs': 'drop_conflicts',
        'compat': 'override',
        'coords': 'minimal'
    }
)



In [21]:
ddict_ex_combined.keys()

dict_keys(['GFDL-CM4.gn.Omon.r1i1p1f1'])

## Quick fix for inhomogenous metrics
I have to think about this more. So basically some of the models (ACCESS) have time variables thickness for ssp585 and static for the historical.
This leads to huge dask chunks. For now I am taking those out, which will lead to a static recompute later...

In [22]:
def check_chunks(ds):
    trigger_vars = []
    for var in ds.variables:
        if isinstance(ds[var].data, dask.array.Array):
            for di, ch in zip(ds[var].dims, ds[var].data.chunks):
                if di == 'time':
                    if any([c>10 for c in list(ch)]):
                        trigger_vars.append(var)
                    
    return trigger_vars

# drop the variables in question
ddict_ex_combined_filtered = {}
for name,ds in ddict_ex_combined.items():
    check = check_chunks(ds)
    if len(check)>0:
        print(name)
        print(check)
    ds = ds.drop(check)
    ddict_ex_combined_filtered[name] = ds

GFDL-CM4.gn.Omon.r1i1p1f1
['agessc', 'time_bounds']


This is where I loose the age for GFDL...

## Outstanding issue ACCESS cant combine with some having no thkness
So basically in this example:
```python
ds1 = ddict_matched_regrid['ACCESS-ESM1-5.historical.Omon.r1i1p1f1']
ds2 = ddict_matched_regrid['ACCESS-ESM1-5.ssp585.Omon.r1i1p1f1']
ds2

ds_combined = xr.concat([ds1.drop('thkcello'), ds2], 'time', **{'combine_attrs': 'drop_conflicts', 'compat': 'override', 'coords': 'minimal'})
ds_combined
```
I figured that the thkcello should be dropped, but xarray fails. Raise an issue about that. Otherwise Ill have to check in the combination function...


In [23]:
# only pick full runs (historical and ssp585)
ddict_ex_combined_full = {k:ds for k,ds in ddict_ex_combined_filtered.items() if len(ds.time)>3000}

In [24]:
ddict_ex_combined_full.keys()

dict_keys(['GFDL-CM4.gn.Omon.r1i1p1f1'])

## Check datasets for completeness and log the ones with problems

In [25]:
from cmip6_preprocessing.grids import combine_staggered_grid
problems = {'missing_variables':[], 'missing_area':[], 'missing_thickness':[], 'reconstructed_area':[], 'reconstructed_thickness':[]}
ddict_filtered = {}
for name, ds in ddict_ex_combined_full.items():
    flag = False
    # Check that all necessary variables are given
    missing_variables = [va for va in ["thetao", "so", "o2"] if va not in ds.variables]
    if len(missing_variables)>0:
        flag = True
        problems['missing_variables'].append((name, missing_variables))
        
    # Check for area
    if not 'areacello' in ds.coords:
        if ds.attrs['grid_label'] == 'gr': # only reconstruct for regular grids
            grid, ds = combine_staggered_grid(ds, recalculate_metrics=True)
            # I am dropping dz_t here so it can be uniformly reconstructed
            ds = ds.drop('dz_t')
            ds = ds.assign_coords(areacello = (ds.dx_t * ds.dy_t).reset_coords(drop=True))
            problems['reconstructed_area'].append(name)
            assert 'areacello' in ds.coords
        else:
            flag = True
            problems['missing_area'].append(name)
    
    # Check for thickness (and rename) TODO: We should probably not rename and just refactor to use `thkcello`
    if "thkcello" in ds.coords:
        ds = ds.rename({'thkcello': 'dz_t'})
    else:
        # try to reconstruct the thickness from static info
        try:
#             lev_vertices = cf_xarray.bounds_to_vertices(ds.lev_bounds, 'bnds').load()
#             dz_t = lev_vertices.diff('lev_vertices')
#             ds = ds.assign_coords(dz_t=('lev', dz_t.data))
            ds = construct_static_dz(ds).rename({'thkcello': 'dz_t'})
            problems['reconstructed_thickness'].append(name)
        except Exception as e:
            print(f'{name} thickness reconstruction failed with {e}')
            print(ds)
            problems['missing_thickness'].append(name)
            flag=True
            
    if not flag:
        ddict_filtered[name] = ds

In [26]:
list(np.sort(list(ddict_filtered.keys())))

['GFDL-CM4.gn.Omon.r1i1p1f1']

In [27]:
problems

{'missing_variables': [],
 'missing_area': [],
 'missing_thickness': [],
 'reconstructed_area': [],
 'reconstructed_thickness': ['GFDL-CM4.gn.Omon.r1i1p1f1']}

In [28]:
ddict_final = pick_first_member(ddict_filtered)#
list(np.sort(list(ddict_final.keys())))

['GFDL-CM4.gn.Omon']

## Hacking time 😎

Not sure if this actually improved things...but it reduces the number of tasks...which is generally good.

Bring this over to xarrayutils (more info/test in `dev_efficient_bottom_removal`)

In [29]:
# just code that shit in numba
from numba import float64, guvectorize
import numpy as np
import xarray as xr

@guvectorize(
    [
        (float64[:], float64[:]),
    ],
    "(n)->(n)",
    nopython=True,
)
def _remove_last_value(data, output):
    # initialize output
    output[:] = data[:]
    for i in range(len(data)-1):
        if np.isnan(output[i+1]):
            output[i] = np.nan
    # take care of boundaries
    if not np.isnan(output[-1]):
        output[-1] = np.nan

def remove_bottom_values_numba(da, dim='lev'):
    
    out = xr.apply_ufunc(
        _remove_last_value,
        da,
        input_core_dims=[[dim]],
        output_core_dims=[[dim]],
        dask="parallelized",
        output_dtypes=[da.dtype],
    )
    return out

def remove_bottom_values_recoded(ds, dim="lev", fill_val=-1e10):
    """Remove the deepest values that are not nan along the dimension `dim`"""
    # for now assume that values of `dim` increase along the dimension
    if ds[dim][0] > ds[dim][-1]:
        raise ValueError(
            f"It seems like `{dim}` has decreasing values. This is not supported yet. Please sort before."
        )
    else:
        ds_masked = xr.Dataset({va:remove_bottom_values_numba(ds[va]) for va in ds.data_vars})
        ds_masked = ds_masked.transpose(*tuple([di for di in ds.dims if di in ds_masked]))
        ds_masked = ds_masked.assign_coords({co:ds[co].transpose(*[di for di in ds.dims if di in ds[co]]) for co in ds.coords})
        ds_masked.attrs = ds.attrs
        return ds_masked

## The final loop to vertiLocalClustery transform to sigma-space and save output

In [30]:
from cmip6_omz.omz_tools import omz_thickness_efficient

In [31]:
from IPython.core.display import display, HTML
def print_html(ds):
    display(HTML(ds._repr_html_()))

In [32]:
# I will have to process the control runs seperately
#         if ds.attrs["experiment_id"] == "piControl":
#             ds = ds.isel(time=slice(-300 * 12, None))



# overwrite = True
overwrite = False
with warnings.catch_warnings():
    warnings.filterwarnings('ignore')# might need to remove later...
    for synthetic in [True, False]:
        for mi, (name, ds) in enumerate(ddict_final.items()):
            print_html(ds)
            
            t0 = time.time()
            synthetic_string = 'synthetic example' if synthetic else ' '
            print(f"######################{name} {synthetic_string} ({mi+1}/{len(ddict_filtered)}) ###############")

            dataset_id = f"{cmip6_dataset_id(ds)}"

            if synthetic:
                filename = ofolder.joinpath(f"{dataset_id}_synthetic.zarr")
            else:
                filename = ofolder.joinpath(f"{dataset_id}.zarr")


            if file_exist_check(filename) and not overwrite:
                ds_sigma_reloaded = reload_preexisting(str(filename))
            else:
                print(f"Calculation started for {name}")
                tempfilelist = []
                
                ds = preprocessing_wrapper(ds)
                # clean up the chunk encoding (can probably be dropped in newer xarray versions but leave for now)
                ds = strip_encoding(ds)
                
                o2_bin_chunks=1
            
                
                print('Preprocessing')
                
                # I need to align.mask the thickness aswell!
                ds = ds.reset_coords(["dz_t"])
                #Experimental: Broadcast dz and keep it as variable
                ds['dz_t'] = xr.ones_like(ds.thetao) * ds.dz_t
                #perform nan-masking functions
                ds = align_missing(ds)
    #             ds = remove_bottom_values(ds)
                ds = remove_bottom_values_recoded(ds)
#                 ds = ds.set_coords("dz_t")

                
                print('Preprocessing DONE')

                # reconstruct the potential density
                ds["sigma_0"] = (rho(ds.so, ds.thetao, 0) - 1000)
                
                # drop some variables to reduce the number of tasks
                # Just a temp fix, ultimately I need to not figure out how to convert to thickness efficiently
                ds = ds.drop_vars(['thetao', 'so'])
                
                # If active create synthetic control dataset with constant historical o2
                
                ##################################################################
                # write out an intermediate version of the high res models here, #
                # they always crash otherwise                                    #
                ##################################################################
                
                if 'GFDL' in name or 'HR' in name:
                    print('Special Treatment for nasty models')

                    # rechunking this mess is necessary....yikes
#                     ds = ds.chunk({'time':1, 'x':720, 'y':576, 'lev':-1})
                    ds = ds.chunk({'time':1, 'x':720, 'y':576, 'lev':-1})
                    
                    print_html(ds)
                    
                    filename_temp_hr = tempfolder.joinpath(f"{name}_temp_high_res.zarr")
                    
                    print(f"Temp saving to {filename_temp_hr}")
                    append_write_zarr(ds, filename_temp_hr, 60)
                    
                    tempfilelist.append(filename_temp_hr)
    
                    ds_reloaded = xr.open_zarr(
                        filename_temp_hr,
                        consolidated=False,
                        use_cftime=True
                    )
                    ds = ds_reloaded
                    print('Check that this is chunked properly!')
                    print_html(ds)
                
#                 # One more temp write (maybe this is enough?)
#                 ###################################################
#                 print_html(ds)

#                 filename_temp_all = tempfolder.joinpath(f"{name}_temp_all.zarr")

#                 print(f"Temp saving AGAIN to {filename_temp_all}")
#                 append_write_zarr(ds, filename_temp_all, 60)

#                 tempfilelist.append(filename_temp_all)

#                 ds_reloaded_again = xr.open_zarr(
#                     filename_temp_all,
#                     consolidated=False,
#                     use_cftime=True
#                 )
#                 ds = ds_reloaded_again
#                 ##########################################################



                if synthetic:
                    print('Calculating fixed oxygen values')
                    with ProgressBar():
                        o2_hist = ds.o2.sel(time=slice('1850', '1900')).mean('time').load()
                    o2_hist_broadcasted = xr.ones_like(ds.sigma_0) * o2_hist
                    ds = ds.assign(o2=o2_hist_broadcasted)

                    assert np.allclose(ds.o2.isel(time=0).load(), ds.o2.isel(time=-100).load(), equal_nan=True)
                    assert not np.allclose(ds.sigma_0.isel(time=0), ds.sigma_0.isel(time=-100), equal_nan=True)
                
                    
                ds["omz_thickness"] = omz_thickness_efficient(
                    ds, o2_bins=o2_bins, bin_chunks=o2_bin_chunks
                )
                
                print('Before transforming')
                print_html(ds)
                
                ds_sigma_monthly = transform_wrapper(ds, sigma_bins=fine_sigma_bins)

                # Check that the total ocean volume has not changed in the transformation
                assert vol_consistency_check_wrapper(ds, ds_sigma_monthly)

                # average yearly (otherwise the outputs become huuuuge)
                ds_sigma_yearly = resample_yearly(ds_sigma_monthly)
                    
#                     ds_sigma_yearly_reloaded, tempfilelist_var = temp_write_split(
#                         ds_sigma_yearly,
#                         tempfolder,
#                         verbose=False,
#                         method='dimension',
#                         split_interval=1 if len(ds.x)>400 else 10,
#                     )
#                 tempfilelist.extend(tempfilelist_var)

                #################### write out results ########################
#                 ds_sigma_reloaded = write(
#                     ds_sigma_yearly_reloaded,
#                     filename,
#                     overwrite=False,
#                     force_load=False,
#                     check_zarr_complete=True,
#                 )

                dim_split = 10
                if len(ds.x)> 400:
                    dim_split = 1
#                 if len(ds.x)>1000:
#                     dim_split = 1
                
                print('Write out final file')
                print_html(ds_sigma_yearly)
                
                
#                 with ProgressBar():
                append_write_zarr(ds_sigma_yearly, filename, dim_split)
                
                ds_sigma_reloaded = xr.open_zarr(
                    filename,
                    consolidated=True,
                    use_cftime=True
                )

                ###### delete temps ######
                print('removing temps')
                for tf in tempfilelist:
                    if tf.exists():
                        shutil.rmtree(tf)

                        
                # Check metadata
                for ma in ['source_id', 'grid_label', 'table_id', 'variant_label']:
                    assert ds.attrs[ma] == ds_sigma_reloaded.attrs[ma]

            ##################### Verification plotting ##########################
            print('plotting results')
            try:
                plot_omz_results(ds_sigma_reloaded)
            except Exception as e:
                print(f"Plotting failed with: {e}")
            plt.show()
            t1 = time.time()
            print(f"Time passed: {(t1-t0)/60 } minutes")

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,63.28 kiB
Shape,"(1080, 1440)","(90, 180)"
Count,385 Tasks,96 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.93 MiB 63.28 kiB Shape (1080, 1440) (90, 180) Count 385 Tasks 96 Chunks Type float32 numpy.ndarray",1440  1080,

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,63.28 kiB
Shape,"(1080, 1440)","(90, 180)"
Count,385 Tasks,96 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,253.12 kiB
Shape,"(1080, 1440, 4)","(90, 180, 4)"
Count,193 Tasks,96 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 253.12 kiB Shape (1080, 1440, 4) (90, 180, 4) Count 193 Tasks 96 Chunks Type float32 numpy.ndarray",4  1440  1080,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,253.12 kiB
Shape,"(1080, 1440, 4)","(90, 180, 4)"
Count,193 Tasks,96 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,560 B,560 B
Shape,"(35, 2)","(35, 2)"
Count,3 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 560 B 560 B Shape (35, 2) (35, 2) Count 3 Tasks 1 Chunks Type float64 numpy.ndarray",2  35,

Unnamed: 0,Array,Chunk
Bytes,560 B,560 B
Shape,"(35, 2)","(35, 2)"
Count,3 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,63.28 kiB
Shape,"(1080, 1440)","(90, 180)"
Count,673 Tasks,96 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.93 MiB 63.28 kiB Shape (1080, 1440) (90, 180) Count 673 Tasks 96 Chunks Type float32 numpy.ndarray",1440  1080,

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,63.28 kiB
Shape,"(1080, 1440)","(90, 180)"
Count,673 Tasks,96 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,253.12 kiB
Shape,"(1080, 1440, 4)","(90, 180, 4)"
Count,481 Tasks,96 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 253.12 kiB Shape (1080, 1440, 4) (90, 180, 4) Count 481 Tasks 96 Chunks Type float32 numpy.ndarray",4  1440  1080,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,253.12 kiB
Shape,"(1080, 1440, 4)","(90, 180, 4)"
Count,481 Tasks,96 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,63.28 kiB
Shape,"(2, 1080, 1440)","(1, 90, 180)"
Count,1441 Tasks,192 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 63.28 kiB Shape (2, 1080, 1440) (1, 90, 180) Count 1441 Tasks 192 Chunks Type float32 numpy.ndarray",1440  1080  2,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,63.28 kiB
Shape,"(2, 1080, 1440)","(1, 90, 180)"
Count,1441 Tasks,192 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,63.28 kiB
Shape,"(2, 1080, 1440)","(1, 90, 180)"
Count,1153 Tasks,192 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 63.28 kiB Shape (2, 1080, 1440) (1, 90, 180) Count 1153 Tasks 192 Chunks Type float32 numpy.ndarray",1440  1080  2,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,63.28 kiB
Shape,"(2, 1080, 1440)","(1, 90, 180)"
Count,1153 Tasks,192 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,5.93 MiB
Shape,"(1080, 1440)","(1080, 1440)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.93 MiB 5.93 MiB Shape (1080, 1440) (1080, 1440) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",1440  1080,

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,5.93 MiB
Shape,"(1080, 1440)","(1080, 1440)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,1.22 GiB
Shape,"(3012, 35, 1080, 1440)","(3, 35, 1080, 1440)"
Count,7034 Tasks,1004 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 1.22 GiB Shape (3012, 35, 1080, 1440) (3, 35, 1080, 1440) Count 7034 Tasks 1004 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,1.22 GiB
Shape,"(3012, 35, 1080, 1440)","(3, 35, 1080, 1440)"
Count,7034 Tasks,1004 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,1.22 GiB
Shape,"(3012, 35, 1080, 1440)","(3, 35, 1080, 1440)"
Count,7034 Tasks,1004 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 1.22 GiB Shape (3012, 35, 1080, 1440) (3, 35, 1080, 1440) Count 7034 Tasks 1004 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,1.22 GiB
Shape,"(3012, 35, 1080, 1440)","(3, 35, 1080, 1440)"
Count,7034 Tasks,1004 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,1.22 GiB
Shape,"(3012, 35, 1080, 1440)","(3, 35, 1080, 1440)"
Count,10046 Tasks,1004 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 1.22 GiB Shape (3012, 35, 1080, 1440) (3, 35, 1080, 1440) Count 10046 Tasks 1004 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,1.22 GiB
Shape,"(3012, 35, 1080, 1440)","(3, 35, 1080, 1440)"
Count,10046 Tasks,1004 Chunks
Type,float64,numpy.ndarray


######################GFDL-CM4.gn.Omon synthetic example (1/1) ###############
Calculation started for GFDL-CM4.gn.Omon
Replacing intake_esm_varname attrs value with `none`
Preprocessing
Preprocessing DONE
Special Treatment for nasty models


Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,4 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.93 MiB 1.58 MiB Shape (1440, 1080) (720, 576) Count 4 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440,

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,4 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,4 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.93 MiB 1.58 MiB Shape (1440, 1080) (720, 576) Count 4 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440,

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,4 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,4 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.93 MiB 1.58 MiB Shape (1440, 1080) (720, 576) Count 4 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440,

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,4 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,597 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 6.33 MiB Shape (4, 1440, 1080) (4, 720, 576) Count 597 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440  4,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,597 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,309 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 6.33 MiB Shape (4, 1440, 1080) (4, 720, 576) Count 309 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440  4,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,309 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,560 B,560 B
Shape,"(2, 35)","(2, 35)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 560 B 560 B Shape (2, 35) (2, 35) Count 4 Tasks 1 Chunks Type float64 numpy.ndarray",35  2,

Unnamed: 0,Array,Chunk
Bytes,560 B,560 B
Shape,"(2, 35)","(2, 35)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,1385 Tasks,8 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 1.58 MiB Shape (1440, 1080, 2) (720, 576, 1) Count 1385 Tasks 8 Chunks Type float32 numpy.ndarray",2  1080  1440,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,1385 Tasks,8 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,1673 Tasks,8 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 1.58 MiB Shape (1440, 1080, 2) (720, 576, 1) Count 1673 Tasks 8 Chunks Type float32 numpy.ndarray",2  1080  1440,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,1673 Tasks,8 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,40167 Tasks,12048 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 110.74 MiB Shape (3012, 35, 1080, 1440) (1, 35, 576, 720) Count 40167 Tasks 12048 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,40167 Tasks,12048 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,32130 Tasks,12048 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 110.74 MiB Shape (3012, 35, 1080, 1440) (1, 35, 576, 720) Count 32130 Tasks 12048 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,32130 Tasks,12048 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,51218 Tasks,12048 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 110.74 MiB Shape (3012, 35, 1080, 1440) (1, 35, 576, 720) Count 51218 Tasks 12048 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,51218 Tasks,12048 Chunks
Type,float64,numpy.ndarray


Temp saving to ../../data/temp/scratch_temp/fine_density_tests_combined_2/GFDL-CM4.gn.Omon_temp_high_res.zarr


  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)


  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return jmd95numba.rho(s,t,p)
  return

Check that this is chunked properly!


Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.93 MiB 1.58 MiB Shape (1440, 1080) (720, 576) Count 5 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440,

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.93 MiB 1.58 MiB Shape (1440, 1080) (720, 576) Count 5 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440,

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 1.58 MiB Shape (1440, 1080, 2) (720, 576, 1) Count 9 Tasks 8 Chunks Type float32 numpy.ndarray",2  1080  1440,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 6.33 MiB Shape (4, 1440, 1080) (4, 720, 576) Count 5 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440  4,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,560 B,560 B
Shape,"(2, 35)","(2, 35)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 560 B 560 B Shape (2, 35) (2, 35) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",35  2,

Unnamed: 0,Array,Chunk
Bytes,560 B,560 B
Shape,"(2, 35)","(2, 35)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.93 MiB 1.58 MiB Shape (1440, 1080) (720, 576) Count 5 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440,

Unnamed: 0,Array,Chunk
Bytes,5.93 MiB,1.58 MiB
Shape,"(1440, 1080)","(720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 1.58 MiB Shape (1440, 1080, 2) (720, 576, 1) Count 9 Tasks 8 Chunks Type float32 numpy.ndarray",2  1080  1440,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 6.33 MiB Shape (4, 1440, 1080) (4, 720, 576) Count 5 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440  4,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 110.74 MiB Shape (3012, 35, 1080, 1440) (1, 35, 576, 720) Count 12049 Tasks 12048 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 110.74 MiB Shape (3012, 35, 1080, 1440) (1, 35, 576, 720) Count 12049 Tasks 12048 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 110.74 MiB Shape (3012, 35, 1080, 1440) (1, 35, 576, 720) Count 12049 Tasks 12048 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray


Calculating fixed oxygen values


  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)


Before transforming


Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 1.58 MiB Shape (1440, 1080, 2) (720, 576, 1) Count 9 Tasks 8 Chunks Type float32 numpy.ndarray",2  1080  1440,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 6.33 MiB Shape (4, 1440, 1080) (4, 720, 576) Count 5 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440  4,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,560 B,560 B
Shape,"(2, 35)","(2, 35)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 560 B 560 B Shape (2, 35) (2, 35) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",35  2,

Unnamed: 0,Array,Chunk
Bytes,560 B,560 B
Shape,"(2, 35)","(2, 35)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 1.58 MiB Shape (1440, 1080, 2) (720, 576, 1) Count 9 Tasks 8 Chunks Type float32 numpy.ndarray",2  1080  1440,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 6.33 MiB Shape (4, 1440, 1080) (4, 720, 576) Count 5 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440  4,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 110.74 MiB Shape (3012, 35, 1080, 1440) (1, 35, 576, 720) Count 12049 Tasks 12048 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,24105 Tasks,12048 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 110.74 MiB Shape (3012, 35, 1080, 1440) (1, 35, 576, 720) Count 24105 Tasks 12048 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,24105 Tasks,12048 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.19 TiB 110.74 MiB Shape (3012, 35, 1080, 1440) (1, 35, 576, 720) Count 12049 Tasks 12048 Chunks Type float64 numpy.ndarray",3012  1  1440  1080  35,

Unnamed: 0,Array,Chunk
Bytes,1.19 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440)","(1, 35, 576, 720)"
Count,12049 Tasks,12048 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.16 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440, 6)","(1, 35, 576, 720, 1)"
Count,216886 Tasks,72288 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 7.16 TiB 110.74 MiB Shape (3012, 35, 1080, 1440, 6) (1, 35, 576, 720, 1) Count 216886 Tasks 72288 Chunks Type float64 numpy.ndarray",35  3012  6  1440  1080,

Unnamed: 0,Array,Chunk
Bytes,7.16 TiB,110.74 MiB
Shape,"(3012, 35, 1080, 1440, 6)","(1, 35, 576, 720, 1)"
Count,216886 Tasks,72288 Chunks
Type,float64,numpy.ndarray


Check if ocean volume is conserved...
Relative difference ocean vol: 0.000636806484266166% | OMZ vol [7.19978435e-07 5.67405855e-07 5.82037381e-07 5.39676722e-07
 3.98273931e-07 3.73529078e-07]%
Write out final file


Unnamed: 0,Array,Chunk
Bytes,130.88 GiB,142.38 MiB
Shape,"(251, 1080, 1440, 45)","(1, 576, 720, 45)"
Count,389555 Tasks,1004 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 130.88 GiB 142.38 MiB Shape (251, 1080, 1440, 45) (1, 576, 720, 45) Count 389555 Tasks 1004 Chunks Type float64 numpy.ndarray",251  1  45  1440  1080,

Unnamed: 0,Array,Chunk
Bytes,130.88 GiB,142.38 MiB
Shape,"(251, 1080, 1440, 45)","(1, 576, 720, 45)"
Count,389555 Tasks,1004 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,130.88 GiB,142.38 MiB
Shape,"(251, 1080, 1440, 45)","(1, 576, 720, 45)"
Count,100403 Tasks,1004 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 130.88 GiB 142.38 MiB Shape (251, 1080, 1440, 45) (1, 576, 720, 45) Count 100403 Tasks 1004 Chunks Type float64 numpy.ndarray",251  1  45  1440  1080,

Unnamed: 0,Array,Chunk
Bytes,130.88 GiB,142.38 MiB
Shape,"(251, 1080, 1440, 45)","(1, 576, 720, 45)"
Count,100403 Tasks,1004 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,261.75 GiB,142.38 MiB
Shape,"(2, 251, 1080, 1440, 45)","(1, 1, 576, 720, 45)"
Count,176707 Tasks,2008 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 261.75 GiB 142.38 MiB Shape (2, 251, 1080, 1440, 45) (1, 1, 576, 720, 45) Count 176707 Tasks 2008 Chunks Type float64 numpy.ndarray",251  2  45  1440  1080,

Unnamed: 0,Array,Chunk
Bytes,261.75 GiB,142.38 MiB
Shape,"(2, 251, 1080, 1440, 45)","(1, 1, 576, 720, 45)"
Count,176707 Tasks,2008 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 1.58 MiB Shape (1440, 1080, 2) (720, 576, 1) Count 9 Tasks 8 Chunks Type float32 numpy.ndarray",2  1080  1440,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 11.87 MiB 1.58 MiB Shape (1440, 1080, 2) (720, 576, 1) Count 9 Tasks 8 Chunks Type float32 numpy.ndarray",2  1080  1440,

Unnamed: 0,Array,Chunk
Bytes,11.87 MiB,1.58 MiB
Shape,"(1440, 1080, 2)","(720, 576, 1)"
Count,9 Tasks,8 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 6.33 MiB Shape (4, 1440, 1080) (4, 720, 576) Count 5 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440  4,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 23.73 MiB 6.33 MiB Shape (4, 1440, 1080) (4, 720, 576) Count 5 Tasks 4 Chunks Type float32 numpy.ndarray",1080  1440  4,

Unnamed: 0,Array,Chunk
Bytes,23.73 MiB,6.33 MiB
Shape,"(4, 1440, 1080)","(4, 720, 576)"
Count,5 Tasks,4 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,130.88 GiB,142.38 MiB
Shape,"(251, 1080, 1440, 45)","(1, 576, 720, 45)"
Count,510044 Tasks,1004 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 130.88 GiB 142.38 MiB Shape (251, 1080, 1440, 45) (1, 576, 720, 45) Count 510044 Tasks 1004 Chunks Type float64 numpy.ndarray",251  1  45  1440  1080,

Unnamed: 0,Array,Chunk
Bytes,130.88 GiB,142.38 MiB
Shape,"(251, 1080, 1440, 45)","(1, 576, 720, 45)"
Count,510044 Tasks,1004 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,785.26 GiB,142.38 MiB
Shape,"(251, 1080, 1440, 6, 45)","(1, 576, 720, 1, 45)"
Count,843384 Tasks,6024 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 785.26 GiB 142.38 MiB Shape (251, 1080, 1440, 6, 45) (1, 576, 720, 1, 45) Count 843384 Tasks 6024 Chunks Type float64 numpy.ndarray",1080  251  45  6  1440,

Unnamed: 0,Array,Chunk
Bytes,785.26 GiB,142.38 MiB
Shape,"(251, 1080, 1440, 6, 45)","(1, 576, 720, 1, 45)"
Count,843384 Tasks,6024 Chunks
Type,float64,numpy.ndarray


  return self.ufunc(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))
  return self.ufunc(*args, **kwargs)
  return self.ufunc(*args, **kwargs)
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return self.ufunc(*args, **kwargs)
  return self.ufunc(*args, **kwargs)
  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)


KeyboardInterrupt: 

In [None]:
ds

In [None]:
client.restart()

In [None]:
ds.isel(time=0).load()

### THoughts

Should I convert dz_t to a dask array in any case?

- 

In [None]:
ds.o2.isel(time=-2, lev=20).plot()

In [None]:
# shutil.rmtree('/home/jbusecke/projects/cmip6_omz/data/processed/fine_density_tests_combined_2/none.none.MPI-ESM1-2-HR.none.r1i1p1f1.Omon.gn.none.none.zarr')

- ~~CanESM5 crapped out (only for the variable o2 case)~~

## Can I save the output?

import pathlib

In [None]:
dont execute this....

In [None]:
# import dask
# from multiprocessing.pool import ThreadPool
# dask.config.set(pool=ThreadPool(6))

In [None]:
import pathlib
import xarray as xr
import numpy as np
foldername = "fine_density_tests_combined_2"
tempfolder = pathlib.Path(f"../../data/temp/scratch_temp/{foldername}")

In [None]:
datasets = [
    xr.open_zarr(
        tempfolder.joinpath(f"temp_write_split_{str(i)}.zarr"),
        consolidated=False,
        use_cftime=True
    ) for i in range(251)
]

In [None]:
ds = xr.concat(datasets, 'time', compat='override', coords='minimal')

In [None]:
foldername = "fine_density_tests_combined_2"
# ofolder = maybe_create_folder(f"../../data/external/{foldername}")
ofolder = pathlib.Path(f"../../data/processed/{foldername}")

In [None]:
from cmip6_preprocessing.utils import cmip6_dataset_id

In [None]:
manual_store = ofolder.joinpath(f"{cmip6_dataset_id(ds)}_synthetic.zarr")

In [None]:
import shutil
shutil.rmtree(manual_store)

In [None]:
append_write_zarr(ds, manual_store, 20)

In [None]:
xr.open_zarr(manual_store).thetao.isel(sigma_0=20).mean(['x', 'y']).plot()

Process Dask Worker process (from Nanny):
Process Dask Worker process (from Nanny):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/scratch/gpfs2/jbusecke/conda_tigressdata/envs/cmip6_omz/lib/python3.9/site-packages/distributed/nanny.py", line 830, in _run
  File "/scratch/gpfs2/jbusecke/conda_tigressdata/envs/cmip6_omz/lib/python3.9/site-packages/distributed/nanny.py", line 830, in _run
    loop.run_sync(run)
  File "/scratch/gpfs2/jbusecke/conda_tigressdata/envs/cmip6_omz/lib/python3.9/site-packages/tornado/ioloop.py", line 524, in run_sync
    loop.run_sync(run)
  File "/scratch/gpfs2/jbusecke/conda_tigressdata/envs/cmip6_omz/lib/python3.9/site-packages/tornado/ioloop.py", line 524, in run_sync
    self.start()
  File "/scratch/gpfs2/jbusecke/conda_tigressdata/envs/cmip6_omz/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.start()
  File "/scratch/gpfs2/jbusecke/conda_tigressdata/envs/cmip6_omz/lib/python3.9/site-pa

I have to loop to write from one zarr to another? WTF is wrong with this machine?