In [1]:
import xarray as xr
import psutil
import os
from dask.distributed import Client
from utils import climatology, config

## Checking total memory with psutil

In [2]:
# Get total memory in bytes and convert to GB
total_memory = psutil.virtual_memory().total / (1024**3)
print(f"Total memory: {total_memory:.2f} GB")

Total memory: 63.25 GB


## Chunk Calculator

In [3]:
# Dimensions and data type
time_chunk = 365
latitude_chunk = 281
longitude_chunk = 441
data_size = 4  # float32 takes 4 bytes

# Calculate chunk size in bytes
chunk_size = time_chunk * latitude_chunk * longitude_chunk * data_size
chunk_size_GB = chunk_size / (1024**3)  # Convert to GB
print(f"Each chunk is approximately {chunk_size_GB:.6f} GB")

Each chunk is approximately 0.168499 GB


In [4]:
# Assuming you know your total memory (e.g., 16GB)
memory_limit = '8GB'  # Limit memory usage to 8GB per worker
client = Client(n_workers=4,memory_limit=memory_limit)  # Starts a local cluster with memory limits
print(client)

<Client: 'tcp://127.0.0.1:54267' processes=4 threads=24, memory=29.80 GiB>


In [5]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 24,Total memory: 29.80 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:54267,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 24
Started: Just now,Total memory: 29.80 GiB

0,1
Comm: tcp://127.0.0.1:54296,Total threads: 6
Dashboard: http://127.0.0.1:54297/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:54270,
Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-m6nd6ga4,Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-m6nd6ga4

0,1
Comm: tcp://127.0.0.1:54290,Total threads: 6
Dashboard: http://127.0.0.1:54292/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:54272,
Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-fjmlwva2,Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-fjmlwva2

0,1
Comm: tcp://127.0.0.1:54291,Total threads: 6
Dashboard: http://127.0.0.1:54294/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:54274,
Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-2u2qx01r,Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-2u2qx01r

0,1
Comm: tcp://127.0.0.1:54287,Total threads: 6
Dashboard: http://127.0.0.1:54288/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:54276,
Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-fu5vv8z0,Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-fu5vv8z0


In [15]:
# Open the NetCDF file
fname = "era5_t2min_1970_2000.nc"
ds = xr.open_dataset("{}{}".format(config.data_path,fname),chunks={"latitude": "auto", "longitude": 25,"time": -1}) #can set up chunks, for example xr.open_dataset("data/{}".format(fname),chunks={'time': 365})
#if more than one file, then use xr.open_mfdataset

In [16]:
ds

Unnamed: 0,Array,Chunk
Bytes,58.62 MiB,3.32 MiB
Shape,"(124, 281, 441)","(124, 281, 25)"
Dask graph,18 chunks in 2 graph layers,18 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 58.62 MiB 3.32 MiB Shape (124, 281, 441) (124, 281, 25) Dask graph 18 chunks in 2 graph layers Data type float32 numpy.ndarray",441  281  124,

Unnamed: 0,Array,Chunk
Bytes,58.62 MiB,3.32 MiB
Shape,"(124, 281, 441)","(124, 281, 25)"
Dask graph,18 chunks in 2 graph layers,18 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Calculate seasonal anomalies

In [9]:
da = ds[config.to_anomaly_var]
ds_anomalies = climatology.scale_da(da)

In [17]:
os.path.join('{}{}'.format(config.data_path,config.output_from_anomalies))

'data/era5_t2min_scaled_anomalies.nc'

In [13]:
ds_anomalies['anomaly_scaled'].to_netcdf(os.path.join('{}{}'.format(config.data_path,config.output_from_anomalies)),compute=True,mode="w")

In [14]:
#Remember to close the dask client
client.close()