In [56]:
import xarray as xr
import matplotlib.pyplot as plt
from glob import glob
import datetime
import numpy as np
from dateutil.relativedelta import * 


In [48]:
var_name = ('evapr_oaflux', # monthly-mean ocean evaporation (cm/year)
            'lh_oaflux',    # latent heat flux (positive upward) (W/m2)
            'lw_isccp',     # net longwave radiation (positive upward) (W/m2)
            'qa_oaflux',    # 2m air humidity (g/kg)
            'qnet',         # net heat flux (positive downward) (W/m2)
            'sh_oaflux',    # sensible heat flux (positive upward) (W/m2)
            'sw_isccp',     # net shortwave radiation (positive downward) (W/m2)
            'ta_oaflux',    # 2m air temperature (degree-C)
            'ts_oaflux',    # sea surface temperature (degree-C)
            'ws_oaflux',    # 10m neutral wind speed (m/s)
           )

# All files for each variable include also the monthly-mean error estimates for 
# the corresponding variable.

# The flux and basic meteorological variables were saved by using "ncshort" format to 
# minimize the size of the file. Land and missing values are 32766. All flux variables 
# are multiplied by a factor of 10, surface meterological variables by 100, and 
# evaporation by 10. 

In [None]:
def read_netcdfs(files, dim, transform_func=None):
    def process_one_path(path):
        with xr.open_dataset(path) as ds:
            year = path[-7:-3]
            base = datetime.datetime(int(year), 1, 1)
            dates = np.array([base+relativedelta(months=+i) for i in ds.time.values-1])
            ds['time'] = dates
            ds = ds[list(ds.keys())[0]]
            ds.load()
            return ds

    paths = sorted(glob(files))
    datasets = [process_one_path(p) for p in paths]
    combined = xr.concat(datasets, dim, )
    return combined

da = read_netcdfs('/home/jovyan/marineHeatwave-image-classification/data/OAFlux/evapr_oaflux*.nc', dim='time')

In [None]:
da

In [32]:
def process_one_path(path):
    # use a context manager, to ensure the file gets closed after use
    with xr.open_dataset(path) as ds:
        # transform_func should do some sort of selection or
        # aggregation
       
        # load all data from the transformed dataset, to ensure we can
        # use it after closing each original file
        ds.load()
        return ds
        
        files = '/home/jovyan/marineHeatwave-image-classification/data/OAFlux/evapr_oaflux*.nc'
paths = sorted(glob(files))
datasets = [process_one_path(p) for p in paths]

In [61]:
year = paths[0][-7:-3]
base = datetime.datetime(int(year), 1, 1)
dates = np.array([base+relativedelta(months=+i) for i in ds.time.values-1])
ds['time'] = dates

array([datetime.datetime(1958, 1, 1, 0, 0),
       datetime.datetime(1958, 2, 1, 0, 0),
       datetime.datetime(1958, 3, 1, 0, 0),
       datetime.datetime(1958, 4, 1, 0, 0),
       datetime.datetime(1958, 5, 1, 0, 0),
       datetime.datetime(1958, 6, 1, 0, 0),
       datetime.datetime(1958, 7, 1, 0, 0),
       datetime.datetime(1958, 8, 1, 0, 0),
       datetime.datetime(1958, 9, 1, 0, 0),
       datetime.datetime(1958, 10, 1, 0, 0),
       datetime.datetime(1958, 11, 1, 0, 0),
       datetime.datetime(1958, 12, 1, 0, 0)], dtype=object)

In [69]:
ds

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.97 MiB 2.97 MiB Shape (12, 180, 360) (12, 180, 360) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",360  180  12,

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.97 MiB 2.97 MiB Shape (12, 180, 360) (12, 180, 360) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",360  180  12,

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [66]:
ds.time

In [57]:
base+relativedelta(months=+1)

datetime.datetime(1958, 2, 1, 0, 0)

In [80]:

ds = xr.open_mfdataset('/home/jovyan/marineHeatwave-image-classification/data/OAFlux/evapr_oaflux_1980.nc', concat_dim='time', combine='nested', engine='netcdf4')
ds

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.97 MiB 2.97 MiB Shape (12, 180, 360) (12, 180, 360) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",360  180  12,

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.97 MiB 2.97 MiB Shape (12, 180, 360) (12, 180, 360) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",360  180  12,

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [91]:
list(ds.keys())[0]

'evapr'

In [26]:
ds.time

In [16]:
ds = xr.open_mfdataset('/home/jovyan/marineHeatwave-image-classification/data/OAFlux/evapr_oaflux_1977.nc', concat_dim='time',decode_times=False, engine='netcdf4')
ds

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.97 MiB 2.97 MiB Shape (12, 180, 360) (12, 180, 360) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",360  180  12,

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.97 MiB 2.97 MiB Shape (12, 180, 360) (12, 180, 360) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",360  180  12,

Unnamed: 0,Array,Chunk
Bytes,2.97 MiB,2.97 MiB
Shape,"(12, 180, 360)","(12, 180, 360)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [9]:
ds.time