In [2]:
from pathlib import Path
import xarray as xr

In [3]:
data_path = Path("/sfs/fs1/work-geomar3/smomw091/BalticSea/")

In [4]:
ls -lah {str(data_path)}

total 15G
drwxr-xr-x  7 smomw091 smomw  84K Feb 16 13:24 [0m[01;34m.[0m/
drwxr-xr-x 30 smomw091 smomw 168K Feb  2 10:36 [01;34m..[0m/
drwxr-xr-x  3 smomw091 smomw 8.0K May 22  2017 [01;34masc[0m/
-rw-r--r--  1 smomw091 smomw 3.5G Feb 16 10:58 INSPIRE_1979_2016_daily_oce_BSALT.nc
-rw-r--r--  1 smomw091 smomw 3.8G Feb 16 10:58 INSPIRE_1979_2016_daily_oce_BTEMP.nc
-rw-r--r--  1 smomw091 smomw 3.4G Feb 16 13:24 INSPIRE_1979_2016_daily_oce_SSS.nc
-rw-r--r--  1 smomw091 smomw 3.6G Feb 15 18:32 INSPIRE_1979_2016_daily_oce_SST.nc
-rw-r--r--  1 smomw091 smomw 120M Feb 16 10:54 INSPIRE_1979_2016_monthly_oce_BSALT.nc
-rw-r--r--  1 smomw091 smomw 132M Feb 16 08:30 INSPIRE_1979_2016_monthly_oce_BTEMP.nc
-rw-r--r--  1 smomw091 smomw 115M Feb 16 13:20 INSPIRE_1979_2016_monthly_oce_SSS.nc
-rw-r--r--  1 smomw091 smomw 122M Feb 15 18:27 INSPIRE_1979_2016_monthly_oce_SST.nc
drwxr-xr-x  2 smomw091 smomw 260K Feb  9  2017 [01;34mlogs[0m/
drwxr-xr-x 11 smomw091 smomw  60K May 12  2017 

In [5]:
# get all data files
data_files = list(sorted(data_path.glob("INSPIRE_*_monthly_*.nc")))
print(data_files)

[PosixPath('/sfs/fs1/work-geomar3/smomw091/BalticSea/INSPIRE_1979_2016_monthly_oce_BSALT.nc'), PosixPath('/sfs/fs1/work-geomar3/smomw091/BalticSea/INSPIRE_1979_2016_monthly_oce_BTEMP.nc'), PosixPath('/sfs/fs1/work-geomar3/smomw091/BalticSea/INSPIRE_1979_2016_monthly_oce_SSS.nc'), PosixPath('/sfs/fs1/work-geomar3/smomw091/BalticSea/INSPIRE_1979_2016_monthly_oce_SST.nc')]


In [6]:
# open all files in a single data set
ds = xr.open_mfdataset(data_files)

# remove depth dim and rename dims to shorter standard names
ds = ds.squeeze()
ds = ds.rename({"latitude_ts": "lat", "longitude_ts": "lon"})
del(ds.coords["depth"])

# remove global attributes
for k in list(ds.attrs.keys()):
    del(ds.attrs[k])

ds

<xarray.Dataset>
Dimensions:  (lat: 538, lon: 586, time: 456)
Coordinates:
  * lon      (lon) float32 4.0275 4.0725 4.1175 4.1625 4.2075 4.2525 4.2975 ...
  * lat      (lat) float32 53.8225 53.845 53.8675 53.89 53.9125 53.935 ...
  * time     (time) datetime64[ns] 1979-01-31T12:00:00 1979-02-28T12:00:00 ...
Data variables:
    BTEMP    (time, lat, lon) float64 dask.array<shape=(456, 538, 586), chunksize=(456, 538, 586)>
    SSS      (time, lat, lon) float64 dask.array<shape=(456, 538, 586), chunksize=(456, 538, 586)>
    BSALT    (time, lat, lon) float64 dask.array<shape=(456, 538, 586), chunksize=(456, 538, 586)>
    SST      (time, lat, lon) float64 dask.array<shape=(456, 538, 586), chunksize=(456, 538, 586)>

In [7]:
# calculate annual means (not caring for length of months ...)
ds_am = ds.resample(time="1Y").mean()

  label=label, base=base)


In [8]:
# make all vars 32-Bit (from 64) and remove unwanted attributes
for v in ds_am.variables:
    if v not in ds_am.coords:
        ds_am[v] = ds_am[v].astype("float32")
        for keep_att in ["long_name", "units"]:
            ds_am[v].attrs[keep_att] = ds[v].attrs[keep_att]

In [9]:
for v in ds_am.variables:
    print(ds_am[v], "\n=======\n")

<xarray.DataArray 'time' (time: 38)>
array(['1979-12-31T00:00:00.000000000', '1980-12-31T00:00:00.000000000',
       '1981-12-31T00:00:00.000000000', '1982-12-31T00:00:00.000000000',
       '1983-12-31T00:00:00.000000000', '1984-12-31T00:00:00.000000000',
       '1985-12-31T00:00:00.000000000', '1986-12-31T00:00:00.000000000',
       '1987-12-31T00:00:00.000000000', '1988-12-31T00:00:00.000000000',
       '1989-12-31T00:00:00.000000000', '1990-12-31T00:00:00.000000000',
       '1991-12-31T00:00:00.000000000', '1992-12-31T00:00:00.000000000',
       '1993-12-31T00:00:00.000000000', '1994-12-31T00:00:00.000000000',
       '1995-12-31T00:00:00.000000000', '1996-12-31T00:00:00.000000000',
       '1997-12-31T00:00:00.000000000', '1998-12-31T00:00:00.000000000',
       '1999-12-31T00:00:00.000000000', '2000-12-31T00:00:00.000000000',
       '2001-12-31T00:00:00.000000000', '2002-12-31T00:00:00.000000000',
       '2003-12-31T00:00:00.000000000', '2004-12-31T00:00:00.000000000',
       '2005-1

In [10]:
# We're still in lazy-evaluation mode.  Let's actually
# compute the new resampled data before dropping it to disk.
ds_am = ds_am.compute()

  x = np.divide(x1, x2, out)


In [11]:
ds_am.attrs["history"] = "Created from monthly INSPIRE fields weighting all months equally."

In [12]:
ds_am.to_netcdf("tmp_baltic.nc", engine="scipy")

In [13]:
# show the file on disk
!ncdump -hs tmp_baltic.nc

netcdf tmp_baltic {
dimensions:
	time = 38 ;
	lon = 586 ;
	lat = 538 ;
variables:
	float lon(lon) ;
		lon:standard_name = "longitude" ;
		lon:long_name = "longitude [Tracer grid]" ;
		lon:units = "degrees_east" ;
		lon:axis = "X" ;
		lon:_FillValue = NaNf ;
	float lat(lat) ;
		lat:standard_name = "latitude" ;
		lat:long_name = "Latitude  [Tracer grid]" ;
		lat:units = "degrees_north" ;
		lat:axis = "Y" ;
		lat:_FillValue = NaNf ;
	float BTEMP(time, lat, lon) ;
		BTEMP:long_name = "Bottom temperature" ;
		BTEMP:units = "degree Celsius" ;
		BTEMP:_FillValue = NaNf ;
	float SSS(time, lat, lon) ;
		SSS:long_name = "Salinity" ;
		SSS:units = "psu" ;
		SSS:_FillValue = NaNf ;
	float BSALT(time, lat, lon) ;
		BSALT:long_name = "Bottom salinity" ;
		BSALT:units = "psu" ;
		BSALT:_FillValue = NaNf ;
	float SST(time, lat, lon) ;
		SST:long_name = "Temperature" ;
		SST:units = "degree Celsius" ;
		SST:_FillValue = NaNf ;
	int time(time) ;
		time:units = "days si

In [17]:
# and copy to a deflated nc file
!nccopy -7 -s -d1 \
    -c "time/1,lat/269,lon/293" \
    tmp_baltic.nc \
    INSPIRE_1979_2016_annual_oce_surf_and_bottom_hydrograhy.nc

In [18]:
!ncdump -hs INSPIRE_1979_2016_annual_oce_surf_and_bottom_hydrograhy.nc

netcdf INSPIRE_1979_2016_annual_oce_surf_and_bottom_hydrograhy {
dimensions:
	time = 38 ;
	lon = 586 ;
	lat = 538 ;
variables:
	float lon(lon) ;
		lon:standard_name = "longitude" ;
		lon:long_name = "longitude [Tracer grid]" ;
		lon:units = "degrees_east" ;
		lon:axis = "X" ;
		lon:_FillValue = NaNf ;
		lon:_Storage = "chunked" ;
		lon:_ChunkSizes = 586 ;
		lon:_DeflateLevel = 1 ;
		lon:_Shuffle = "true" ;
		lon:_Endianness = "little" ;
	float lat(lat) ;
		lat:standard_name = "latitude" ;
		lat:long_name = "Latitude  [Tracer grid]" ;
		lat:units = "degrees_north" ;
		lat:axis = "Y" ;
		lat:_FillValue = NaNf ;
		lat:_Storage = "chunked" ;
		lat:_ChunkSizes = 538 ;
		lat:_DeflateLevel = 1 ;
		lat:_Shuffle = "true" ;
		lat:_Endianness = "little" ;
	float BTEMP(time, lat, lon) ;
		BTEMP:long_name = "Bottom temperature" ;
		BTEMP:units = "degree Celsius" ;
		BTEMP:_FillValue = NaNf ;
		BTEMP:_Storage = "chunked" ;
		BTEMP:_ChunkSizes = 1, 269, 293 ;
		BTEMP

In [19]:
!ls -lah INSPIRE_1979_2016_annual_oce_surf_and_bottom_hydrograhy.nc

-rw-r--r-- 1 smomw122 smomw 39M Feb 22 11:44 INSPIRE_1979_2016_annual_oce_surf_and_bottom_hydrograhy.nc


---

*As this notebook was prepared via nb.geomar.de, we'll have to manually get the data file from the Nesh frontend.*