In [1]:
import xarray as xr
import dask
import zarr
import timeit
import time
import os

In [2]:
# Open the NetCDF file using xarray
file_path = '2m_temperature-day-mean.nc'
ds = xr.open_dataset(file_path)
ds

In [3]:
# File size on disk
file_size_bytes = os.path.getsize(file_path)
file_size_gb = file_size_bytes / (1024**3)
print(f"The size of the file is {file_size_gb:.2f} GB")
# Dataset size in memeory
memory_size_bytes = ds.nbytes
memory_size_gb = memory_size_bytes / (1024**3)
print(f"The dataset size in memory is approximately {memory_size_gb:.2f} GB")

The size of the file is 28.25 GB
The dataset size in memory is approximately 113.02 GB


In [4]:
# Access the variable (e.g., 't2m')
t2m = ds['t2m']
t2m

**Slicing Using Zarr (1/5/2025)**

Cube Chunk (200x200x200)

In [5]:
# Slicing the data into 200x200x200 cubes
cube_chunks= ds.chunk({'time': 200, 'latitude': 200, 'longitude': 200}).t2m
print(cube_chunks.chunks)

((200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 10), (200, 200, 200, 121), (200, 200, 200, 200, 200, 200, 200, 40))


In [6]:
first_chunk = cube_chunks.isel(time=slice(0, 200), latitude=slice(0, 200), longitude=slice(0, 200)).compute()
print(first_chunk)

<xarray.DataArray 't2m' (time: 200, latitude: 200, longitude: 200)> Size: 64MB
array([[[248.72569322, 248.72569322, 248.72569322, ..., 248.72569322,
         248.72569322, 248.72569322],
        [248.65922374, 248.65922374, 248.65922374, ..., 248.74719805,
         248.74719805, 248.74719805],
        [248.63771891, 248.63771891, 248.63576393, ..., 248.69636845,
         248.69832343, 248.70027842],
        ...,
        [280.52742761, 280.54893244, 280.58216717, ..., 287.25843956,
         287.26234953, 287.24866464],
        [280.78548557, 280.79721548, 280.83045022, ..., 287.30926916,
         287.31317913, 287.31513412],
        [281.04940849, 281.06504837, 281.12369791, ..., 287.35423381,
         287.35618879, 287.35814378]],

       [[246.48528087, 246.48528087, 246.48528087, ..., 246.48528087,
         246.48528087, 246.48528087],
        [246.24481777, 246.24481777, 246.24481777, ..., 246.45400112,
         246.4559561 , 246.4559561 ],
        [246.27609752, 246.27414254, 246.2

In [7]:
# Slicing the data into 500x500x500 cubes
cube_chunks= ds.chunk({'time': 500, 'latitude': 500, 'longitude': 500}).t2m
print(cube_chunks.chunks)
# Saving the slices into a Zarr file
cube_chunks.to_zarr('equal_2.zarr', mode='w')

((500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 110), (500, 221), (500, 500, 440))


<xarray.backends.zarr.ZarrStore at 0x1203dad0b40>

700x700x700 required an allocation of 2.5 Gb which caused an error (will try executing it again)

In [10]:
# Slicing the data into 50x50x50 cubes
cube_chunks= ds.chunk({'time': 50, 'latitude': 50, 'longitude': 50}).t2m
print(cube_chunks.chunks)
# Saving the slices into a Zarr file
cube_chunks.to_zarr('equal_3.zarr', mode='w')

((50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50

<xarray.backends.zarr.ZarrStore at 0x1203db7e340>

Vertical Cuboid Chunks

In [None]:
# Slicing the data into 100x100x800 vertical cuboids
v_cuboids_chunks= ds.chunk({'time': 800, 'latitude': 100, 'longitude': 100}).t2m
print(v_cuboids_chunks.chunks)
# Saving the slices into a Zarr file
v_cuboids_chunks.to_zarr('equal.zarr', mode='w')

In [8]:
# Slicing the data into 1000x300x300 vertical cuboids
v_cuboids_chunks= ds.chunk({'time': 1000, 'latitude': 300, 'longitude': 300}).t2m
print(v_cuboids_chunks.chunks)
# Saving the slices into a Zarr file
v_cuboids_chunks.to_zarr('vertical_2.zarr', mode='w')

((1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 610), (300, 300, 121), (300, 300, 300, 300, 240))


In [11]:
# Slicing the data into 300x30x30 vertical cuboids
v_cuboids_chunks= ds.chunk({'time': 300, 'latitude': 30, 'longitude': 30}).t2m
print(v_cuboids_chunks.chunks)
# Saving the slices into a Zarr file
v_cuboids_chunks.to_zarr('vertical_3.zarr', mode='w')

((300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 210), (30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 1), (30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30))


<xarray.backends.zarr.ZarrStore at 0x12041aa13c0>

Horizontal Cuboid Chunks

In [None]:
# Slicing the data into 400x40x50 horizontal cuboids
h_cuboids_chunks= ds.chunk({'time': 50, 'latitude': 400, 'longitude': 400}).t2m
print(h_cuboids_chunks.chunks)
# Saving the slices into a Zarr file
h_cuboids_chunks.to_zarr('equal.zarr', mode='w')

In [9]:
# Slicing the data into 100x700x700 horizontal cuboids
h_cuboids_chunks= ds.chunk({'time': 100, 'latitude': 700, 'longitude': 700}).t2m
print(h_cuboids_chunks.chunks)
# Saving the slices into a Zarr file
h_cuboids_chunks.to_zarr('horizontal_2.zarr', mode='w')

((100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 10), (700, 21), (700, 700, 40))


<xarray.backends.zarr.ZarrStore at 0x1203dad23c0>

In [12]:
# Slicing the data into 10x200x200 horizontal cuboids
h_cuboids_chunks= ds.chunk({'time': 10, 'latitude': 200, 'longitude': 200}).t2m
print(h_cuboids_chunks.chunks)
# Saving the slices into a Zarr file
h_cuboids_chunks.to_zarr('horizontal_3.zarr', mode='w')

((10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10

<xarray.backends.zarr.ZarrStore at 0x120671c4040>

**Slicing Using NetCDF**

Cube Chunk (200x200x200)

In [4]:
#First cube chunk (0, 200)
cube_chunk1 = t2m.isel(latitude=slice(0, 200), longitude=slice(0, 200), time=slice(0, 200)).compute()
cube_chunk1
cube_chunk1.to_netcdf('cube1.nc')

In [5]:
#Second cube chunk (200, 400)
cube_chunk2 = t2m.isel(latitude=slice(200, 400), longitude=slice(200, 400), time=slice(200, 400)).compute()
cube_chunk2
cube_chunk2.to_netcdf('cube2.nc')

In [6]:
#Third cube chunk (400, 600)
cube_chunk3 = t2m.isel(latitude=slice(400, 600), longitude=slice(400, 600), time=slice(400, 600)).compute()
cube_chunk3
cube_chunk3.to_netcdf('cube3.nc')

In [7]:
#Size of all cube chuncks

for i in range (3):
    file_path= f"cube{i+1}.nc"
    file_size_bytes = os.path.getsize(file_path)
    print(f"Cube {i+1} size on disk: {file_size_bytes / (1024**2):.2f} MB")
    
    


Cube 1 size on disk: 15.27 MB
Cube 2 size on disk: 15.27 MB
Cube 3 size on disk: 15.27 MB


In [8]:
#Range query using the cube access pattern on the first cube chunk
start = time.time()
q1_ds = xr.open_dataset('cube1.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -180.0 -179.8 -179.5 ... -168.0 -167.8
  * latitude   (latitude) float32 200B 90.0 89.75 89.5 ... 78.25 78.0 77.75
  * time       (time) datetime64[ns] 400B 1984-01-01 1984-01-02 ... 1984-02-19
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 245.6
Execution time: 0.025914907455444336 s 
Accessed data size: 0.95 MB


In [None]:
q1

In [9]:
#Range query using the cube access pattern on the second cube chunk
start = time.time()
q1_ds = xr.open_dataset('cube2.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -130.0 -129.8 -129.5 ... -118.0 -117.8
  * latitude   (latitude) float32 200B 40.0 39.75 39.5 ... 28.25 28.0 27.75
  * time       (time) datetime64[ns] 400B 1984-07-19 1984-07-20 ... 1984-09-06
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 287.7 287.7 ... 295.7
Execution time: 0.024009227752685547 s 
Accessed data size: 0.95 MB


In [10]:
#Range query using the cube access pattern on the third cube chunk
start = time.time()
q1_ds = xr.open_dataset('cube3.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -80.0 -79.75 -79.5 ... -68.0 -67.75
  * latitude   (latitude) float32 200B -10.0 -10.25 -10.5 ... -22.0 -22.25
  * time       (time) datetime64[ns] 400B 1985-02-04 1985-02-05 ... 1985-03-25
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 296.4 296.1 ... 276.1
Execution time: 0.03533434867858887 s 
Accessed data size: 0.95 MB


In [None]:
#Merge queries from the same file
start = time.time()
cube_ds_list = []
cube_ds = xr.open_dataset('cube1.nc')
for i in range(1, 50):
    cube_query = cube_ds.isel(time=slice(0, i), latitude=slice(0, i), longitude=slice(0, i)).compute()
    cube_ds_list.append(cube_query)
cube_result = xr.merge(cube_ds_list).compute()
end = time.time()
execution_time = end - start
print(cube_result)
execution_time

In [None]:
#Merge query results from different files
start = time.time()
cube_ds_list = []
cube_files = ['cube1.nc', 'cube2.nc', 'cube3.nc']
for file in cube_files:
    cube_ds = xr.open_dataset(file)
    cube_query = cube_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
    cube_ds_list.append(cube_query)
merge_cube_result = xr.merge(cube_ds_list).compute()
end = time.time()
execution_time = end - start
print(merge_cube_result)
execution_time

In [None]:
merge_cube_result

NAN values because of different dimension values.
 
For example, if one chunk has latitude values from 0 to 199, and another chunk has latitude values from 200 to 399, xarray will not automatically align them, leading to NaN values where the coordinates do not match.

In [23]:
#Range query using the vertical cuboid access pattern on the first cube chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cube1.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -180.0 -179.8 -179.5 ... -178.0 -177.8
  * latitude   (latitude) float32 40B 90.0 89.75 89.5 89.25 ... 88.25 88.0 87.75
  * time       (time) datetime64[ns] 320B 1984-01-01 1984-01-02 ... 1984-02-09
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 248.7 248.7 ... 243.9
Execution time: 0.016068220138549805 s 
Accessed data size: 0.03 MB


In [29]:
#Range query using the vertical cuboid access pattern on the first cube chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cube1.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 40, latitude: 40, time: 100)
Coordinates:
  * longitude  (longitude) float32 160B -180.0 -179.8 -179.5 ... -170.5 -170.2
  * latitude   (latitude) float32 160B 90.0 89.75 89.5 ... 80.75 80.5 80.25
  * time       (time) datetime64[ns] 800B 1984-01-01 1984-01-02 ... 1984-04-09
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 247.3
Execution time: 0.028816938400268555 s 
Accessed data size: 1.22 MB


In [24]:
#Range query using the vertical cuboid access pattern on the second cube chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cube2.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -130.0 -129.8 -129.5 ... -128.0 -127.8
  * latitude   (latitude) float32 40B 40.0 39.75 39.5 39.25 ... 38.25 38.0 37.75
  * time       (time) datetime64[ns] 320B 1984-07-19 1984-07-20 ... 1984-08-27
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 287.7 287.7 ... 290.6
Execution time: 0.04691886901855469 s 
Accessed data size: 0.03 MB


In [30]:
#Range query using the vertical cuboid access pattern on the second cube chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cube2.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 40, latitude: 40, time: 100)
Coordinates:
  * longitude  (longitude) float32 160B -130.0 -129.8 -129.5 ... -120.5 -120.2
  * latitude   (latitude) float32 160B 40.0 39.75 39.5 ... 30.75 30.5 30.25
  * time       (time) datetime64[ns] 800B 1984-07-19 1984-07-20 ... 1984-10-26
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 287.7 287.7 ... 290.8
Execution time: 0.028264760971069336 s 
Accessed data size: 1.22 MB


In [35]:
#Range query using the horizontal cuboid access pattern on the first cube chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cube1.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -180.0 -179.8 -179.5 ... -155.5 -155.2
  * latitude   (latitude) float32 400B 90.0 89.75 89.5 ... 65.75 65.5 65.25
  * time       (time) datetime64[ns] 200B 1984-01-01 1984-01-02 ... 1984-01-25
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 248.7 248.7 ... 235.8
Execution time: 0.029001951217651367 s 
Accessed data size: 1.91 MB


In [36]:
#Range query using the horizontal cuboid access pattern on the second cube chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cube2.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -130.0 -129.8 -129.5 ... -105.5 -105.2
  * latitude   (latitude) float32 400B 40.0 39.75 39.5 ... 15.75 15.5 15.25
  * time       (time) datetime64[ns] 200B 1984-07-19 1984-07-20 ... 1984-08-12
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 287.7 287.7 ... 300.2
Execution time: 0.023001670837402344 s 
Accessed data size: 1.91 MB


Vertical Cuboid (100x100x800)

In [13]:
#First cuboid chunk 
cuboidv_chunk1 = t2m.isel(latitude=slice(0, 100), longitude=slice(0, 100), time=slice(0, 800)).compute()
cuboidv_chunk1
cuboidv_chunk1.to_netcdf('cuboidv1.nc')

In [14]:
#Second cuboid chunk 
cuboidv_chunk1 = t2m.isel(latitude=slice(100, 200), longitude=slice(100, 200), time=slice(800, 1600)).compute()
cuboidv_chunk1
cuboidv_chunk1.to_netcdf('cuboidv2.nc')

In [15]:
#Size of all cuboid chuncks

for i in range (2):
    file_path= f"cuboidv{i+1}.nc"
    file_size_bytes = os.path.getsize(file_path)
    print(f"Cuboid {i+1} size on disk: {file_size_bytes / (1024**2):.2f} MB")

Cuboid 1 size on disk: 15.27 MB
Cuboid 2 size on disk: 15.27 MB


In [16]:
#Range query using the cube access pattern on the first vertical cuboid chunk
start = time.time()
q1_ds = xr.open_dataset('cuboidv1.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -180.0 -179.8 -179.5 ... -168.0 -167.8
  * latitude   (latitude) float32 200B 90.0 89.75 89.5 ... 78.25 78.0 77.75
  * time       (time) datetime64[ns] 400B 1984-01-01 1984-01-02 ... 1984-02-19
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 245.6
Execution time: 0.03468918800354004 s 
Accessed data size: 0.95 MB


In [17]:
#Range query using the cube access pattern on the second vertical cuboid chunk
start = time.time()
q1_ds = xr.open_dataset('cuboidv2.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -155.0 -154.8 -154.5 ... -143.0 -142.8
  * latitude   (latitude) float32 200B 65.0 64.75 64.5 ... 53.25 53.0 52.75
  * time       (time) datetime64[ns] 400B 1986-03-11 1986-03-12 ... 1986-04-29
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 253.9 254.0 ... 279.0
Execution time: 0.014892578125 s 
Accessed data size: 0.95 MB


In [25]:
#Range query using the vertical cuboid access pattern on the first vertical cuboid chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cuboidv1.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -180.0 -179.8 -179.5 ... -178.0 -177.8
  * latitude   (latitude) float32 40B 90.0 89.75 89.5 89.25 ... 88.25 88.0 87.75
  * time       (time) datetime64[ns] 320B 1984-01-01 1984-01-02 ... 1984-02-09
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 248.7 248.7 ... 243.9
Execution time: 0.019104719161987305 s 
Accessed data size: 0.03 MB


In [31]:
#Range query using the vertical cuboid access pattern on the first vertical cuboid chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cuboidv1.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 40, latitude: 40, time: 100)
Coordinates:
  * longitude  (longitude) float32 160B -180.0 -179.8 -179.5 ... -170.5 -170.2
  * latitude   (latitude) float32 160B 90.0 89.75 89.5 ... 80.75 80.5 80.25
  * time       (time) datetime64[ns] 800B 1984-01-01 1984-01-02 ... 1984-04-09
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 247.3
Execution time: 0.022670745849609375 s 
Accessed data size: 1.22 MB


In [26]:
#Range query using the vertical cuboid access pattern on the second vertical cuboid chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cuboidv2.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -155.0 -154.8 -154.5 ... -153.0 -152.8
  * latitude   (latitude) float32 40B 65.0 64.75 64.5 64.25 ... 63.25 63.0 62.75
  * time       (time) datetime64[ns] 320B 1986-03-11 1986-03-12 ... 1986-04-19
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 253.9 254.0 ... 268.0
Execution time: 0.014009237289428711 s 
Accessed data size: 0.03 MB


In [32]:
#Range query using the vertical cuboid access pattern on the second vertical cuboid chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cuboidv2.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 40, latitude: 40, time: 100)
Coordinates:
  * longitude  (longitude) float32 160B -155.0 -154.8 -154.5 ... -145.5 -145.2
  * latitude   (latitude) float32 160B 65.0 64.75 64.5 ... 55.75 55.5 55.25
  * time       (time) datetime64[ns] 800B 1986-03-11 1986-03-12 ... 1986-06-18
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 253.9 254.0 ... 281.0
Execution time: 0.021991729736328125 s 
Accessed data size: 1.22 MB


In [37]:
#Range query using the horizontal cuboid access pattern on the first vertical cuboid chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cuboidv1.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -180.0 -179.8 -179.5 ... -155.5 -155.2
  * latitude   (latitude) float32 400B 90.0 89.75 89.5 ... 65.75 65.5 65.25
  * time       (time) datetime64[ns] 200B 1984-01-01 1984-01-02 ... 1984-01-25
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 248.7 248.7 ... 235.8
Execution time: 0.02421116828918457 s 
Accessed data size: 1.91 MB


In [38]:
#Range query using the horizontal cuboid access pattern on the second vertical cuboid chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cuboidv2.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -155.0 -154.8 -154.5 ... -130.5 -130.2
  * latitude   (latitude) float32 400B 65.0 64.75 64.5 ... 40.75 40.5 40.25
  * time       (time) datetime64[ns] 200B 1986-03-11 1986-03-12 ... 1986-04-04
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 253.9 254.0 ... 283.7
Execution time: 0.025206565856933594 s 
Accessed data size: 1.91 MB


Horizontal Cuboid (400x400x50)

In [18]:
#First cuboid chunk 
cuboidv_chunk1 = t2m.isel(latitude=slice(0, 400), longitude=slice(0, 400), time=slice(0, 50)).compute()
cuboidv_chunk1
cuboidv_chunk1.to_netcdf('cuboidh1.nc')

In [19]:
#Second cuboid chunk 
#For the latitude variable it's only 741 values so will have less tha 400 values
cuboidv_chunk1 = t2m.isel(latitude=slice(400, 800), longitude=slice(400, 800), time=slice(50, 100)).compute()
cuboidv_chunk1
cuboidv_chunk1.to_netcdf('cuboidh2.nc')

In [20]:
#Size of all cuboid chuncks

for i in range (2):
    file_path= f"cuboidh{i+1}.nc"
    file_size_bytes = os.path.getsize(file_path)
    print(f"Cuboid {i+1} size on disk: {file_size_bytes / (1024**2):.2f} MB")

Cuboid 1 size on disk: 15.27 MB
Cuboid 2 size on disk: 12.26 MB


In [21]:
#Range query using the cube access pattern on the first horizontal cuboid chunk
start = time.time()
q1_ds = xr.open_dataset('cuboidh1.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -180.0 -179.8 -179.5 ... -168.0 -167.8
  * latitude   (latitude) float32 200B 90.0 89.75 89.5 ... 78.25 78.0 77.75
  * time       (time) datetime64[ns] 400B 1984-01-01 1984-01-02 ... 1984-02-19
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 245.6
Execution time: 0.028993844985961914 s 
Accessed data size: 0.95 MB


In [22]:
#Range query using the cube access pattern on the second horizontal cuboid chunk
start = time.time()
q1_ds = xr.open_dataset('cuboidh2.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -80.0 -79.75 -79.5 ... -68.0 -67.75
  * latitude   (latitude) float32 200B -10.0 -10.25 -10.5 ... -22.0 -22.25
  * time       (time) datetime64[ns] 400B 1984-02-20 1984-02-21 ... 1984-04-09
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 296.6 296.4 ... 274.5
Execution time: 0.026038408279418945 s 
Accessed data size: 0.95 MB


In [27]:
#Range query using the vertical cuboid access pattern on the first horizontal cuboid chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cuboidh1.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -180.0 -179.8 -179.5 ... -178.0 -177.8
  * latitude   (latitude) float32 40B 90.0 89.75 89.5 89.25 ... 88.25 88.0 87.75
  * time       (time) datetime64[ns] 320B 1984-01-01 1984-01-02 ... 1984-02-09
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 248.7 248.7 ... 243.9
Execution time: 0.029966354370117188 s 
Accessed data size: 0.03 MB


In [34]:
#Range query using the vertical cuboid access pattern on the first horizontal cuboid chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cuboidh1.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 641kB
Dimensions:    (longitude: 40, latitude: 40, time: 50)
Coordinates:
  * longitude  (longitude) float32 160B -180.0 -179.8 -179.5 ... -170.5 -170.2
  * latitude   (latitude) float32 160B 90.0 89.75 89.5 ... 80.75 80.5 80.25
  * time       (time) datetime64[ns] 400B 1984-01-01 1984-01-02 ... 1984-02-19
Data variables:
    t2m        (time, latitude, longitude) float64 640kB 248.7 248.7 ... 242.0
Execution time: 0.02598714828491211 s 
Accessed data size: 0.61 MB


In [28]:
#Range query using the vertical cuboid access pattern on the second horizontal cuboid chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cuboidh2.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -80.0 -79.75 -79.5 ... -78.0 -77.75
  * latitude   (latitude) float32 40B -10.0 -10.25 -10.5 ... -11.75 -12.0 -12.25
  * time       (time) datetime64[ns] 320B 1984-02-20 1984-02-21 ... 1984-03-30
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 296.6 296.4 ... 294.6
Execution time: 0.012999773025512695 s 
Accessed data size: 0.03 MB


In [33]:
#Range query using the vertical cuboid access pattern on the second horizontal cuboid chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cuboidh2.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 641kB
Dimensions:    (longitude: 40, latitude: 40, time: 50)
Coordinates:
  * longitude  (longitude) float32 160B -80.0 -79.75 -79.5 ... -70.5 -70.25
  * latitude   (latitude) float32 160B -10.0 -10.25 -10.5 ... -19.5 -19.75
  * time       (time) datetime64[ns] 400B 1984-02-20 1984-02-21 ... 1984-04-09
Data variables:
    t2m        (time, latitude, longitude) float64 640kB 296.6 296.4 ... 292.5
Execution time: 0.018498897552490234 s 
Accessed data size: 0.61 MB


In [39]:
#Range query using the horizontal cuboid access pattern on the first horizontal cuboid chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cuboidh1.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -180.0 -179.8 -179.5 ... -155.5 -155.2
  * latitude   (latitude) float32 400B 90.0 89.75 89.5 ... 65.75 65.5 65.25
  * time       (time) datetime64[ns] 200B 1984-01-01 1984-01-02 ... 1984-01-25
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 248.7 248.7 ... 235.8
Execution time: 0.018004417419433594 s 
Accessed data size: 1.91 MB


In [40]:
#Range query using the horizontal cuboid access pattern on the second horizontal cuboid chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cuboidh2.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -80.0 -79.75 -79.5 ... -55.5 -55.25
  * latitude   (latitude) float32 400B -10.0 -10.25 -10.5 ... -34.5 -34.75
  * time       (time) datetime64[ns] 200B 1984-02-20 1984-02-21 ... 1984-03-15
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 296.6 296.4 ... 294.8
Execution time: 0.010125160217285156 s 
Accessed data size: 1.91 MB


Old code for reference

In [None]:
# 1. Time Slice (All latitudes and longitudes at a specific time)
time_slice = t2m.isel(time=0)  # First time step
print(f"\nTime Slice (t=0):\n{time_slice}")
time_slice[0][0].values

In [None]:
#Cube chunk (200x200x200)
cube_chunk = t2m.isel(time=slice(0, 200), latitude=slice(0, 200), longitude=slice(0, 200))
print(cube_chunk)
#cube_chunk[0][0].values

In [None]:
#Range query for cube chunk (index) -> cube access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50))  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for cube chunk (index) -> cube access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 40), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for cube chunk (index) -> vertical cuboid access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 80), latitude=slice(0, 10), longitude=slice(0, 10))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for cube chunk (index) -> vertical cuboid access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for cube chunk (index) -> horizontal cuboid access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

Vertical cuboid chunk (800x100x100)

In [None]:
#Vertical cuboid chunk (800x100x100)
vertical_cuboid_chunk = t2m.isel(time=slice(0, 800), latitude=slice(0, 100), longitude=slice(0, 100))
print(vertical_cuboid_chunk)

In [None]:
#Range query for vertical cuboid chunk (index) -> cube access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for vertical cuboid chunk (index) -> cube access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 40), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for vertical cuboid chunk (index) -> vertical cuboid access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 80), latitude=slice(0, 10), longitude=slice(0, 10))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for vertical cuboid chunk (index) -> vertical cuboid access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for vertical cuboid chunk (index) -> horizontal cuboid access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

Horizontal cuboid chunk (50x400x400)

In [None]:
#Horizontal cuboid chunk (50x400x400)
horizontal_cuboid_chunk = t2m.isel(time=slice(0, 50), latitude=slice(0, 400), longitude=slice(0, 400))
print(horizontal_cuboid_chunk)

In [None]:
#Range query for horizontal cuboid chunk (index) -> cube access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for horizontal cuboid chunk (index) -> cube access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 40), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for horizontal cuboid chunk (index) -> vertical cuboid access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 80), latitude=slice(0, 10), longitude=slice(0, 10))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for horizontal cuboid chunk (index) -> vertical cuboid access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for horizontal cuboid chunk (index) -> horizontal cuboid access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

extra testing

In [None]:
#Point query for vertical cuboid chunk (800x100x100)
def time():
    query = vertical_cuboid_chunk.sel(time='1984-01-01', latitude=90.0, longitude=-180.0)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for cube chunk (values)
def time():
    time_range = slice('1984-01-01', '1985-08-01')
    latitude_range = slice(90.0, 75.0)
    longitude_range = slice(-180.0, -150.0)
    query = cube_chunk.sel(time=time_range, latitude=latitude_range, longitude=longitude_range)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Point query for horizontal cuboid chunk (100x800x800)
def time():
    query = horizontal_cuboid_chunk.sel(time='1984-01-01', latitude=90.0, longitude=-180.0)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Point query for cube chunk
def time():
    query = cube_chunk.sel(time='1984-01-01', latitude=90.0, longitude=-180.0)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
# 4. Horizontal Cuboid-Shaped Chunk (5 time steps, 5 latitudes, 5 longitudes)
def test():
    cube_chunk = t2m.isel(time=slice(0, 4), latitude=slice(0, 4000), longitude=slice(0, 4000))
    print(cube_chunk)
    cube_chunk[0][0].values
execution_time = timeit.timeit(test, number = 1)
print(f"Execution Time: {execution_time}")

In [None]:
#Range query for vertical cuboid chunk (10000x80x80)
def time():
    time_range = slice('1984-01-01', '1985-08-01')
    latitude_range = slice(90.0, 75.0)
    longitude_range = slice(-180.0, -150.0)
    query = vertical_cuboid_chunk.sel(time=time_range, latitude=latitude_range, longitude=longitude_range)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")