In [1]:
import xarray as xr
import psutil
import os
from dask.distributed import Client

## Checking total memory with psutil

In [2]:
# Get total memory in bytes and convert to GB
total_memory = psutil.virtual_memory().total / (1024**3)
print(f"Total memory: {total_memory:.2f} GB")

Total memory: 63.25 GB


## Chunk Calculator

In [3]:
# Dimensions and data type
time_chunk = 365
latitude_chunk = 281
longitude_chunk = 441
data_size = 4  # float32 takes 4 bytes

# Calculate chunk size in bytes
chunk_size = time_chunk * latitude_chunk * longitude_chunk * data_size
chunk_size_GB = chunk_size / (1024**3)  # Convert to GB
print(f"Each chunk is approximately {chunk_size_GB:.6f} GB")

Each chunk is approximately 0.168499 GB


In [4]:
# Assuming you know your total memory (e.g., 16GB)
memory_limit = '8GB'  # Limit memory usage to 8GB per worker
client = Client(n_workers=4,memory_limit=memory_limit)  # Starts a local cluster with memory limits
print(client)

<Client: 'tcp://127.0.0.1:58561' processes=4 threads=24, memory=29.80 GiB>


In [5]:
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 24,Total memory: 29.80 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:64325,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 24
Started: Just now,Total memory: 29.80 GiB

0,1
Comm: tcp://127.0.0.1:64345,Total threads: 6
Dashboard: http://127.0.0.1:64348/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:64328,
Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-hu63s1ze,Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-hu63s1ze

0,1
Comm: tcp://127.0.0.1:64346,Total threads: 6
Dashboard: http://127.0.0.1:64350/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:64330,
Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-hg85bz3b,Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-hg85bz3b

0,1
Comm: tcp://127.0.0.1:64353,Total threads: 6
Dashboard: http://127.0.0.1:64354/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:64332,
Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-n0y27g0x,Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-n0y27g0x

0,1
Comm: tcp://127.0.0.1:64344,Total threads: 6
Dashboard: http://127.0.0.1:64347/status,Memory: 7.45 GiB
Nanny: tcp://127.0.0.1:64334,
Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-m4f4fqlr,Local directory: C:\Users\ls2236\AppData\Local\Temp\dask-scratch-space\worker-m4f4fqlr


In [None]:
# Open the NetCDF file
fname = "era5_t2min_1970_2000.nc"
ds = xr.open_dataset("data/{}".format(fname),chunks={"latitude": "auto", "longitude": 25,"time": -1}) #can set up chunks, for example xr.open_dataset("data/{}".format(fname),chunks={'time': 365})
#if more than one file, then use xr.open_mfdataset

In [8]:
ds

Unnamed: 0,Array,Chunk
Bytes,690.17 MiB,172.54 MiB
Shape,"(1460, 281, 441)","(365, 281, 441)"
Dask graph,4 chunks in 9 graph layers,4 chunks in 9 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 690.17 MiB 172.54 MiB Shape (1460, 281, 441) (365, 281, 441) Dask graph 4 chunks in 9 graph layers Data type float32 numpy.ndarray",441  281  1460,

Unnamed: 0,Array,Chunk
Bytes,690.17 MiB,172.54 MiB
Shape,"(1460, 281, 441)","(365, 281, 441)"
Dask graph,4 chunks in 9 graph layers,4 chunks in 9 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,690.17 MiB,172.54 MiB
Shape,"(1460, 281, 441)","(365, 281, 441)"
Dask graph,4 chunks in 9 graph layers,4 chunks in 9 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 690.17 MiB 172.54 MiB Shape (1460, 281, 441) (365, 281, 441) Dask graph 4 chunks in 9 graph layers Data type float32 numpy.ndarray",441  281  1460,

Unnamed: 0,Array,Chunk
Bytes,690.17 MiB,172.54 MiB
Shape,"(1460, 281, 441)","(365, 281, 441)"
Dask graph,4 chunks in 9 graph layers,4 chunks in 9 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Calculate seasonal anomalies

In [None]:
da = ds["daily_t2_min"]
season_mean = da.groupby("time.season").mean(dim='time')
season_stdev = da.groupby("time.season").std(dim='time')
season_mean = season_mean.sel(season='DJF').drop_vars('season')
season_stdev = season_stdev.sel(season='DJF').drop_vars('season')

In [9]:
ds['anomaly'] = (da - season_mean)
ds['anomaly_scaled'] = ds['anomaly']/season_stdev

In [10]:
ds

Unnamed: 0,Array,Chunk
Bytes,14.65 MiB,850.68 kiB
Shape,"(31, 281, 441)","(31, 281, 25)"
Dask graph,18 chunks in 2 graph layers,18 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 14.65 MiB 850.68 kiB Shape (31, 281, 441) (31, 281, 25) Dask graph 18 chunks in 2 graph layers Data type float32 numpy.ndarray",441  281  31,

Unnamed: 0,Array,Chunk
Bytes,14.65 MiB,850.68 kiB
Shape,"(31, 281, 441)","(31, 281, 25)"
Dask graph,18 chunks in 2 graph layers,18 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,14.65 MiB,850.68 kiB
Shape,"(31, 281, 441)","(31, 281, 25)"
Dask graph,18 chunks in 9 graph layers,18 chunks in 9 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 14.65 MiB 850.68 kiB Shape (31, 281, 441) (31, 281, 25) Dask graph 18 chunks in 9 graph layers Data type float32 numpy.ndarray",441  281  31,

Unnamed: 0,Array,Chunk
Bytes,14.65 MiB,850.68 kiB
Shape,"(31, 281, 441)","(31, 281, 25)"
Dask graph,18 chunks in 9 graph layers,18 chunks in 9 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,14.65 MiB,850.68 kiB
Shape,"(31, 281, 441)","(31, 281, 25)"
Dask graph,18 chunks in 16 graph layers,18 chunks in 16 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 14.65 MiB 850.68 kiB Shape (31, 281, 441) (31, 281, 25) Dask graph 18 chunks in 16 graph layers Data type float32 numpy.ndarray",441  281  31,

Unnamed: 0,Array,Chunk
Bytes,14.65 MiB,850.68 kiB
Shape,"(31, 281, 441)","(31, 281, 25)"
Dask graph,18 chunks in 16 graph layers,18 chunks in 16 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [11]:
ds.to_netcdf(os.path.join("temp/", 'era5_t2min_scaled_anomalies.nc'),compute=True,mode="w")

In [9]:
#Remember to close the dask client
client.close()