In [2]:
import os, sys
import numpy as np

import xarray as xr
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import colors

plt.rc("figure", figsize=(16,8))
plt.rc("font", size=14)

from dask.diagnostics import ProgressBar
from dask.distributed import Client, LocalCluster
#
# Initialisation d'un cluster de 32 coeurs
cluster = LocalCluster(processes=False, n_workers=1, threads_per_worker=10)
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://137.129.156.19:8787/status,

0,1
Dashboard: http://137.129.156.19:8787/status,Workers: 1
Total threads: 10,Total memory: 94.31 GiB
Status: running,Using processes: False

0,1
Comm: inproc://137.129.156.19/57806/1,Workers: 1
Dashboard: http://137.129.156.19:8787/status,Total threads: 10
Started: Just now,Total memory: 94.31 GiB

0,1
Comm: inproc://137.129.156.19/57806/3,Total threads: 10
Dashboard: http://137.129.156.19:43725/status,Memory: 94.31 GiB
Nanny: None,
Local directory: /home/durandy/script/dask-worker-space/worker-whg32vku,Local directory: /home/durandy/script/dask-worker-space/worker-whg32vku


In [9]:
indir = '/cnrm/tropics/commun/DATACOMMUN/WAVE/NO_SAVE/DATA/RAW_CLIM/'
exp = 'clim_tcwv_brut_ERA5_1979_2020'
ds = xr.open_dataset(indir+exp+'.nc', 
                     chunks = {'dayofyear' : 1, 'hour' : 1})
ds = ds.sel(latitude = slice (1,-1))

time_R = pd.date_range("2000-01-01", freq="6H", periods=366 * 4)

#number of harmonics to keep
nbSampl = 4
nbHarm = 3  # 0 for the mean, 1 for the the annual cycle, etc...
nbHarmKeep = nbSampl*nbHarm

In [10]:
#### Reconstruction of the data
da = xr.DataArray(
    data = np.reshape(ds.tcwv.values,(366*4,np.shape(ds.latitude)[0],np.shape(ds.longitude)[0])),
    dims = ["time","latitude","longitude"],
    coords=dict(
        time = time_R,
        latitude = ds.latitude,
        longitude = ds.longitude
    ),
    attrs=dict(
        description="tcwv_clim smoothed",
        units="$kg.m^{-2}$",
    ),
)
da = da.chunk( chunks = {'longitude' : 1})
da

Unnamed: 0,Array,Chunk
Bytes,72.38 MiB,51.47 kiB
Shape,"(1464, 9, 1440)","(1464, 9, 1)"
Count,1440 Tasks,1440 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 72.38 MiB 51.47 kiB Shape (1464, 9, 1440) (1464, 9, 1) Count 1440 Tasks 1440 Chunks Type float32 numpy.ndarray",1440  9  1464,

Unnamed: 0,Array,Chunk
Bytes,72.38 MiB,51.47 kiB
Shape,"(1464, 9, 1440)","(1464, 9, 1)"
Count,1440 Tasks,1440 Chunks
Type,float32,numpy.ndarray


## Selection
Dans la cellule au-dessus on a sortie dans un dataset la climatologie brute de ERA5, et on souhaite "smooth" le signal. On ne garde que les premières harmonique du signal. On utilise le package pour faire une transformée de Fourier sur des tableaux Xarray. https://xrft.readthedocs.io/en/latest/index.html

In [11]:
import xrft

In [12]:
tcwvhat  = xrft.fft(da,
                    dim=['time'], true_phase=False, true_amplitude=True)


In [13]:
tcwvhat = xrft.fft(da, dim="time", true_phase=False, true_amplitude=True)
tcwvhat = xr.where(tcwvhat.freq_time < -nbHarmKeep/(86400*366),  0., tcwvhat )
tcwvhat = xr.where(tcwvhat.freq_time > nbHarmKeep/(86400*366),  0., tcwvhat)

In [14]:
tcwv_Sm = xrft.ifft(tcwvhat, dim = 'freq_time', true_phase=False, true_amplitude=True) # Signal in direct space




In [16]:
outdir = '/cnrm/tropics/commun/DATACOMMUN/WAVE/NO_SAVE/DATA/SMOTHED_CLIM/'
minYear = 1979
maxYear = 2020

ds = tcwv_Sm.real.to_dataset().compute()

ds.to_netcdf(outdir + 'clim_tcwv_smooth_ERA5_'+str(minYear)+'_'+str(maxYear)+'.nc')