In [1]:
import os, sys
import numpy as np

import xarray as xr
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import colors

from scipy.fft import fft, ifft, fftfreq

plt.rc("figure", figsize=(16,8))
plt.rc("font", size=14)

from dask.diagnostics import ProgressBar
from dask.distributed import Client, LocalCluster
#
# Initialisation d'un cluster de 32 coeurs
cluster = LocalCluster(processes=False, n_workers=1, threads_per_worker=32)
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://137.129.156.19:8787/status,

0,1
Dashboard: http://137.129.156.19:8787/status,Workers: 1
Total threads: 10,Total memory: 94.31 GiB
Status: running,Using processes: False

0,1
Comm: inproc://137.129.156.19/47087/1,Workers: 1
Dashboard: http://137.129.156.19:8787/status,Total threads: 10
Started: Just now,Total memory: 94.31 GiB

0,1
Comm: inproc://137.129.156.19/47087/3,Total threads: 10
Dashboard: http://137.129.156.19:46813/status,Memory: 94.31 GiB
Nanny: None,
Local directory: /home/durandy/script/dask-worker-space/worker-oia3mw7v,Local directory: /home/durandy/script/dask-worker-space/worker-oia3mw7v


In [2]:
def on1Dtime(ds,v):
	attrs = {"units": "hours since 1900-01-01", "calendar":"gregorian"}
	ds_T = xr.Dataset({"time": ("time", np.arange(4*365*24,4*365*24+366*24,3), attrs)})

	#### Reconstruction of the data
	da = xr.DataArray(
		data = np.reshape(ds[v].values,(np.shape(ds.dayofyear)[0]*np.shape(ds.hour)[0],1,np.shape(ds.latitude)[0],np.shape(ds.longitude)[0])),
		dims = ["time","level","latitude","longitude",],
		coords=dict(
		time = ds_T.time,
		level = ds.level,
		latitude = ds.latitude,
		longitude = ds.longitude
		),
		attrs=dict(
		description="tcwv_clim",
		units="kg/m2",
		),
	)
	ds = da.to_dataset(name = v)
	return ds.compute()

def isLeapYear (yearN):
    if ((yearN % 4 == 0) and (yearN % 100 != 0)) or (yearN % 400 == 0):
        reponse = True
    else:
        reponse = False
    print(reponse, '\n')
    return reponse

def hour_mean(x):
     return x.groupby('time.hour').mean('time')
    
def hour_sum(x):
     return x.groupby('time.hour').sum('time')
    
def hour_std(x):
     return x.groupby('time.hour').std('time')

In [3]:
print('Start of the script')
var = ['v']
indir = '/cnrm/tropics/commun/DATACOMMUN/ERA5/0.25/netcdf/pl_3h/'
outdir = '/cnrm/tropics/commun/DATACOMMUN/WAVE/NO_SAVE/DATA/RAW_CLIM/'



In [6]:
for v in var :
	print('start for the variable ',v)
	ds = xr.open_mfdataset(indir+'*_'+v+'_*.nc', parallel=True, chunks = 'auto')
	for z in ds.level.values :
		print('start ',z)
		_ds = ds.sel(level = z)
		_ds = _ds.expand_dims(level = 1)
		print(np.shape(_ds[v]))	    
		ds_clim = _ds.groupby('time.dayofyear').apply(hour_mean)
		print(np.shape(ds_clim[v]))
		ds_clim = on1Dtime(ds_clim,v)
		ds_clim.to_netcdf(outdir + 'temp/clim_'+v+str(z)+'z_brut_ERA5_3H_1990_2020_T3.nc')
		print('end ',z,'\n')
	    
	ds = xr.open_mfdataset(outdir + 'temp/clim_'+v+'*z*T3.nc', parallel=True, chunks = {'time' : 1, 'level' : 1})
	ds = ds.to_netcdf(outdir + 'clim_'+v+'_brut_ERA5_3H_1990_2020_T3.nc')	   
	print('end for the variable ',v,'\n')
print('end')

[1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
 2018 2019]
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
