In [None]:
# default_exp read_data

# read_data

> read and preprocess netcdf data

In [None]:
from nbdev import *

In [None]:
#export
import xarray as xr
from glob import glob

In [None]:
#export
def transform_calendar(ds,
                       timedim="time",
                       calendarname="proleptic_gregorin"):
    """Transforms calendar of time index in xarray dataset"""
    ds[timedim].attrs['calendar'] = calendarname
    return ds

In [None]:
show_doc(transform_calendar)

<h4 id="transform_calendar" class="doc_header"><code>transform_calendar</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>transform_calendar</code>(**`ds`**, **`timedim`**=*`'time'`*, **`calendarname`**=*`'proleptic_gregorin'`*)

Transforms calendar of time index in xarray dataset

Use in combination with `xr.open_dataset([...], decode_time=False)`

Args:
- ds (object): xarray dataset

Kwargs:

- timedim (string): name of time dim
- calendarname (string): calendar name

Returns: 
- ds (xarray dataset)

In [None]:
#export
def read_netcdfs(files, dim, transform_func, transform_calendar=None, cftime = True):
    """Reads multiples netcdfs files. Should be used when open_mfdatasets is to slow."""
    def process_one_path(path):
        if transform_calendar is not None:
            calendar = False
        else:
            calendar = True
        with xr.open_dataset(path, decode_times = calendar, use_cftime = cftime) as ds:
            if transform_calendar is not None:
                ds[dim].attrs['calendar'] = transform_calendar
                ds = xr.decode_cf(ds, use_cftime = cftime)
            if transform_func is not None:
                ds = transform_func(ds)
            ds.load()
            return ds
    paths = sorted(glob(files))
    datasets = [process_one_path(p) for p in paths]
    combined = xr.concat(datasets, dim)
    return combined

In [None]:
show_doc(read_netcdfs)

<h4 id="read_netcdfs" class="doc_header"><code>read_netcdfs</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>read_netcdfs</code>(**`files`**, **`dim`**, **`transform_func`**, **`transform_calendar`**=*`None`*, **`cftime`**=*`True`*)

Reads multiples netcdfs files. Should be used when open_mfdatasets is to slow.

Args:
- files : Path to files (str)
- dim : dimension to concat files (if transform_calendar=T, concat along time)
- transform_func : additional preprocessing option
- transform_calendar : name of calendar (see function transform_calendar())

Returns:
- combined netcdf files as xarray object

Example:


combined =  read_netcdfs("*.nc", dim = "TIME",
                transform_func=lambda ds:ds.salt,
                transform_calendar="proleptic_gregorian")