In [36]:
import xarray as xr
import timeit
import time
import os

In [2]:
# Open the NetCDF file using xarray
file_path = 'D:/UMN/iHarpv/2m_temperature-day-mean.nc'
ds = xr.open_dataset(file_path)
ds

In [3]:
# Access the variable (e.g., 't2m')
t2m = ds['t2m']
t2m

Cube Chunk (200x200x200)

In [4]:
#First cube chunk (0, 200)
cube_chunk1 = t2m.isel(latitude=slice(0, 200), longitude=slice(0, 200), time=slice(0, 200)).compute()
cube_chunk1
cube_chunk1.to_netcdf('cube1.nc')

In [6]:
#Second cube chunk (200, 400)
cube_chunk2 = t2m.isel(latitude=slice(200, 400), longitude=slice(200, 400), time=slice(200, 400)).compute()
cube_chunk2
cube_chunk2.to_netcdf('cube2.nc')

In [7]:
#Third cube chunk (400, 600)
cube_chunk3 = t2m.isel(latitude=slice(400, 600), longitude=slice(400, 600), time=slice(400, 600)).compute()
cube_chunk3
cube_chunk3.to_netcdf('cube3.nc')

In [11]:
#Size of all cube chuncks

for i in range (3):
    file_path= f"cube{i+1}.nc"
    file_size_bytes = os.path.getsize(file_path)
    print(f"Cube {i+1} size on disk: {file_size_bytes / (1024**2):.2f} MB")
    
    


Cube 1 size on disk: 15.27 MB
Cube 2 size on disk: 15.27 MB
Cube 3 size on disk: 15.27 MB


In [7]:
#Range query using the cube access pattern on the first cube chunk
start = time.time()
q1_ds = xr.open_dataset('cube1.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -180.0 -179.8 -179.5 ... -168.0 -167.8
  * latitude   (latitude) float32 200B 90.0 89.75 89.5 ... 78.25 78.0 77.75
  * time       (time) datetime64[ns] 400B 1984-01-01 1984-01-02 ... 1984-02-19
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 245.6


0.021802902221679688

In [None]:
#Range query using the cube access pattern on the second cube chunk
start = time.time()
q1_ds = xr.open_dataset('cube2.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -130.0 -129.8 -129.5 ... -118.0 -117.8
  * latitude   (latitude) float32 200B 40.0 39.75 39.5 ... 28.25 28.0 27.75
  * time       (time) datetime64[ns] 400B 1984-07-19 1984-07-20 ... 1984-09-06
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 287.7 287.7 ... 295.7
Execution time: 0.008825302124023438 s 
Accessed data size: 0.95 MB


In [9]:
#Range query using the cube access pattern on the third cube chunk
start = time.time()
q1_ds = xr.open_dataset('cube3.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -80.0 -79.75 -79.5 ... -68.0 -67.75
  * latitude   (latitude) float32 200B -10.0 -10.25 -10.5 ... -22.0 -22.25
  * time       (time) datetime64[ns] 400B 1985-02-04 1985-02-05 ... 1985-03-25
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 296.4 296.1 ... 276.1


0.02696967124938965

In [None]:
#Merge queries from the same file
start = time.time()
cube_ds_list = []
cube_ds = xr.open_dataset('cube1.nc')
for i in range(1, 50):
    cube_query = cube_ds.isel(time=slice(0, i), latitude=slice(0, i), longitude=slice(0, i)).compute()
    cube_ds_list.append(cube_query)
cube_result = xr.merge(cube_ds_list).compute()
end = time.time()
execution_time = end - start
print(cube_result)
execution_time

In [None]:
#Merge query results from different files
start = time.time()
cube_ds_list = []
cube_files = ['cube1.nc', 'cube2.nc', 'cube3.nc']
for file in cube_files:
    cube_ds = xr.open_dataset(file)
    cube_query = cube_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
    cube_ds_list.append(cube_query)
merge_cube_result = xr.merge(cube_ds_list).compute()
end = time.time()
execution_time = end - start
print(merge_cube_result)
execution_time

In [None]:
merge_cube_result

NAN values because of different dimension values.
 
For example, if one chunk has latitude values from 0 to 199, and another chunk has latitude values from 200 to 399, xarray will not automatically align them, leading to NaN values where the coordinates do not match.

In [41]:
#Range query using the vertical cuboid access pattern on the first cube chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cube1.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 64kB
Dimensions:    (longitude: 80, latitude: 10, time: 10)
Coordinates:
  * longitude  (longitude) float32 320B -180.0 -179.8 -179.5 ... -160.5 -160.2
  * latitude   (latitude) float32 40B 90.0 89.75 89.5 89.25 ... 88.25 88.0 87.75
  * time       (time) datetime64[ns] 80B 1984-01-01 1984-01-02 ... 1984-01-10
Data variables:
    t2m        (time, latitude, longitude) float64 64kB 248.7 248.7 ... 244.0


0.015228986740112305

In [42]:
#Range query using the vertical cuboid access pattern on the first cube chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cube1.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 100, latitude: 40, time: 40)
Coordinates:
  * longitude  (longitude) float32 400B -180.0 -179.8 -179.5 ... -155.5 -155.2
  * latitude   (latitude) float32 160B 90.0 89.75 89.5 ... 80.75 80.5 80.25
  * time       (time) datetime64[ns] 320B 1984-01-01 1984-01-02 ... 1984-02-09
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 246.4


0.004216194152832031

In [None]:
#Range query using the vertical cuboid access pattern on the second cube chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cube2.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -130.0 -129.8 -129.5 ... -128.0 -127.8
  * latitude   (latitude) float32 40B 40.0 39.75 39.5 39.25 ... 38.25 38.0 37.75
  * time       (time) datetime64[ns] 320B 1984-07-19 1984-07-20 ... 1984-08-27
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 287.7 287.7 ... 290.6
Execution time: 0.010287046432495117 s 
Accessed data size: 0.03 MB


In [39]:
#Range query using the vertical cuboid access pattern on the second cube chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cube2.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 40, latitude: 40, time: 100)
Coordinates:
  * longitude  (longitude) float32 160B -130.0 -129.8 -129.5 ... -120.5 -120.2
  * latitude   (latitude) float32 160B 40.0 39.75 39.5 ... 30.75 30.5 30.25
  * time       (time) datetime64[ns] 800B 1984-07-19 1984-07-20 ... 1984-10-26
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 287.7 287.7 ... 290.8
Execution time: 0.08109664916992188 s 
Accessed data size: 1.22 MB


In [None]:
#Range query using the horizontal cuboid access pattern on the first cube chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cube1.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

In [40]:
#Range query using the horizontal cuboid access pattern on the second cube chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cube2.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -130.0 -129.8 -129.5 ... -105.5 -105.2
  * latitude   (latitude) float32 400B 40.0 39.75 39.5 ... 15.75 15.5 15.25
  * time       (time) datetime64[ns] 200B 1984-07-19 1984-07-20 ... 1984-08-12
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 287.7 287.7 ... 300.2
Execution time: 0.010188579559326172 s 
Accessed data size: 1.91 MB


Vertical Cuboid (100x100x800)

In [13]:
#First cuboid chunk 
cuboidv_chunk1 = t2m.isel(latitude=slice(0, 100), longitude=slice(0, 100), time=slice(0, 800)).compute()
cuboidv_chunk1
cuboidv_chunk1.to_netcdf('cuboidv1.nc')

In [14]:
#Second cuboid chunk 
cuboidv_chunk1 = t2m.isel(latitude=slice(100, 200), longitude=slice(100, 200), time=slice(800, 1600)).compute()
cuboidv_chunk1
cuboidv_chunk1.to_netcdf('cuboidv2.nc')

In [16]:
#Size of all cuboid chuncks

for i in range (2):
    file_path= f"cuboidv{i+1}.nc"
    file_size_bytes = os.path.getsize(file_path)
    print(f"Cuboid {i+1} size on disk: {file_size_bytes / (1024**2):.2f} MB")

Cuboid 1 size on disk: 15.27 MB
Cuboid 2 size on disk: 15.27 MB


In [None]:
#Range query using the cube access pattern on the first vertical cuboid chunk
start = time.time()
q1_ds = xr.open_dataset('cuboidv1.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -180.0 -179.8 -179.5 ... -168.0 -167.8
  * latitude   (latitude) float32 200B 90.0 89.75 89.5 ... 78.25 78.0 77.75
  * time       (time) datetime64[ns] 400B 1984-01-01 1984-01-02 ... 1984-02-19
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 245.6
Accessed data size: 0.95 MB


In [None]:
#Range query using the cube access pattern on the second vertical cuboid chunk
start = time.time()
q1_ds = xr.open_dataset('cuboidv2.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -155.0 -154.8 -154.5 ... -143.0 -142.8
  * latitude   (latitude) float32 200B 65.0 64.75 64.5 ... 53.25 53.0 52.75
  * time       (time) datetime64[ns] 400B 1986-03-11 1986-03-12 ... 1986-04-29
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 253.9 254.0 ... 279.0
Accessed data size: 0.95 MB


In [31]:
#Range query using the vertical cuboid access pattern on the first vertical cuboid chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cuboidv1.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -180.0 -179.8 -179.5 ... -178.0 -177.8
  * latitude   (latitude) float32 40B 90.0 89.75 89.5 89.25 ... 88.25 88.0 87.75
  * time       (time) datetime64[ns] 320B 1984-01-01 1984-01-02 ... 1984-02-09
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 248.7 248.7 ... 243.9
Accessed data size: 0.95 MB


In [None]:
#Range query using the vertical cuboid access pattern on the first vertical cuboid chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cuboidv1.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 40, latitude: 40, time: 100)
Coordinates:
  * longitude  (longitude) float32 160B -180.0 -179.8 -179.5 ... -170.5 -170.2
  * latitude   (latitude) float32 160B 90.0 89.75 89.5 ... 80.75 80.5 80.25
  * time       (time) datetime64[ns] 800B 1984-01-01 1984-01-02 ... 1984-04-09
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 247.3
Accessed data size: 0.95 MB


In [None]:
#Range query using the vertical cuboid access pattern on the second vertical cuboid chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cuboidv2.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -155.0 -154.8 -154.5 ... -153.0 -152.8
  * latitude   (latitude) float32 40B 65.0 64.75 64.5 64.25 ... 63.25 63.0 62.75
  * time       (time) datetime64[ns] 320B 1986-03-11 1986-03-12 ... 1986-04-19
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 253.9 254.0 ... 268.0
Accessed data size: 0.95 MB


In [None]:
#Range query using the vertical cuboid access pattern on the second vertical cuboid chunk
#(40x40x100)
start = time.time()
q2_ds = xr.open_dataset('cuboidv2.nc')
q2 = q2_ds.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 40, latitude: 40, time: 100)
Coordinates:
  * longitude  (longitude) float32 160B -155.0 -154.8 -154.5 ... -145.5 -145.2
  * latitude   (latitude) float32 160B 65.0 64.75 64.5 ... 55.75 55.5 55.25
  * time       (time) datetime64[ns] 800B 1986-03-11 1986-03-12 ... 1986-06-18
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 253.9 254.0 ... 281.0
Accessed data size: 0.95 MB


In [27]:
#Range query using the horizontal cuboid access pattern on the first vertical cuboid chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cuboidv1.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -180.0 -179.8 -179.5 ... -155.5 -155.2
  * latitude   (latitude) float32 400B 90.0 89.75 89.5 ... 65.75 65.5 65.25
  * time       (time) datetime64[ns] 200B 1984-01-01 1984-01-02 ... 1984-01-25
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 248.7 248.7 ... 235.8
Accessed data size: 0.95 MB


In [26]:
#Range query using the horizontal cuboid access pattern on the second vertical cuboid chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cuboidv2.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -155.0 -154.8 -154.5 ... -130.5 -130.2
  * latitude   (latitude) float32 400B 65.0 64.75 64.5 ... 40.75 40.5 40.25
  * time       (time) datetime64[ns] 200B 1986-03-11 1986-03-12 ... 1986-04-04
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 253.9 254.0 ... 283.7
Accessed data size: 0.95 MB


Horizontal Cuboid (400x400x50)

In [17]:
#First cuboid chunk 
cuboidv_chunk1 = t2m.isel(latitude=slice(0, 400), longitude=slice(0, 400), time=slice(0, 50)).compute()
cuboidv_chunk1
cuboidv_chunk1.to_netcdf('cuboidh1.nc')

In [18]:
#Second cuboid chunk 
#For the latitude variable it's only 741 values so will have less tha 400 values
cuboidv_chunk1 = t2m.isel(latitude=slice(400, 800), longitude=slice(400, 800), time=slice(50, 100)).compute()
cuboidv_chunk1
cuboidv_chunk1.to_netcdf('cuboidh2.nc')

In [19]:
#Size of all cuboid chuncks

for i in range (2):
    file_path= f"cuboidh{i+1}.nc"
    file_size_bytes = os.path.getsize(file_path)
    print(f"Cuboid {i+1} size on disk: {file_size_bytes / (1024**2):.2f} MB")

Cuboid 1 size on disk: 15.27 MB
Cuboid 2 size on disk: 12.26 MB


In [None]:
#Range query using the cube access pattern on the first horizontal cuboid chunk
start = time.time()
q1_ds = xr.open_dataset('cuboidh1.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -180.0 -179.8 -179.5 ... -168.0 -167.8
  * latitude   (latitude) float32 200B 90.0 89.75 89.5 ... 78.25 78.0 77.75
  * time       (time) datetime64[ns] 400B 1984-01-01 1984-01-02 ... 1984-02-19
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 248.7 248.7 ... 245.6
Accessed data size: 0.95 MB


In [None]:
#Range query using the cube access pattern on the second horizontal cuboid chunk
start = time.time()
q1_ds = xr.open_dataset('cuboidh2.nc')
q1 = q1_ds.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50)).compute()
end = time.time()
execution_time = end - start
print(q1)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q1.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 1MB
Dimensions:    (longitude: 50, latitude: 50, time: 50)
Coordinates:
  * longitude  (longitude) float32 200B -80.0 -79.75 -79.5 ... -68.0 -67.75
  * latitude   (latitude) float32 200B -10.0 -10.25 -10.5 ... -22.0 -22.25
  * time       (time) datetime64[ns] 400B 1984-02-20 1984-02-21 ... 1984-04-09
Data variables:
    t2m        (time, latitude, longitude) float64 1MB 296.6 296.4 ... 274.5
Accessed data size: 0.95 MB


In [22]:
#Range query using the vertical cuboid access pattern on the first horizontal cuboid chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cuboidh1.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -180.0 -179.8 -179.5 ... -178.0 -177.8
  * latitude   (latitude) float32 40B 90.0 89.75 89.5 89.25 ... 88.25 88.0 87.75
  * time       (time) datetime64[ns] 320B 1984-01-01 1984-01-02 ... 1984-02-09
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 248.7 248.7 ... 243.9
Accessed data size: 0.95 MB


In [23]:
#Range query using the vertical cuboid access pattern on the second horizontal cuboid chunk
#(10x10x40)
start = time.time()
q2_ds = xr.open_dataset('cuboidh2.nc')
q2 = q2_ds.isel(time=slice(0, 40), latitude=slice(0, 10), longitude=slice(0, 10)).compute()
end = time.time()
execution_time = end - start
print(q2)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q2.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 32kB
Dimensions:    (longitude: 10, latitude: 10, time: 40)
Coordinates:
  * longitude  (longitude) float32 40B -80.0 -79.75 -79.5 ... -78.0 -77.75
  * latitude   (latitude) float32 40B -10.0 -10.25 -10.5 ... -11.75 -12.0 -12.25
  * time       (time) datetime64[ns] 320B 1984-02-20 1984-02-21 ... 1984-03-30
Data variables:
    t2m        (time, latitude, longitude) float64 32kB 296.6 296.4 ... 294.6
Accessed data size: 0.95 MB


In [24]:
#Range query using the horizontal cuboid access pattern on the first horizontal cuboid chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cuboidh1.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -180.0 -179.8 -179.5 ... -155.5 -155.2
  * latitude   (latitude) float32 400B 90.0 89.75 89.5 ... 65.75 65.5 65.25
  * time       (time) datetime64[ns] 200B 1984-01-01 1984-01-02 ... 1984-01-25
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 248.7 248.7 ... 235.8
Accessed data size: 0.95 MB


In [25]:
#Range query using the horizontal cuboid access pattern on the second horizontal cuboid chunk
#(100x100x25)
start = time.time()
q3_ds = xr.open_dataset('cuboidh2.nc')
q3 = q3_ds.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100)).compute()
end = time.time()
execution_time = end - start
print(q3)
print(f"Execution time: {execution_time} s ")
#Calculating the size of the accessed data
accessed_size_bytes = q3.nbytes
accessed_size_mb = accessed_size_bytes / (1024**2)
print(f"Accessed data size: {accessed_size_mb:.2f} MB")

<xarray.Dataset> Size: 2MB
Dimensions:    (longitude: 100, latitude: 100, time: 25)
Coordinates:
  * longitude  (longitude) float32 400B -80.0 -79.75 -79.5 ... -55.5 -55.25
  * latitude   (latitude) float32 400B -10.0 -10.25 -10.5 ... -34.5 -34.75
  * time       (time) datetime64[ns] 200B 1984-02-20 1984-02-21 ... 1984-03-15
Data variables:
    t2m        (time, latitude, longitude) float64 2MB 296.6 296.4 ... 294.8
Accessed data size: 0.95 MB


Old code for reference

In [49]:
# 1. Time Slice (All latitudes and longitudes at a specific time)
time_slice = t2m.isel(time=0)  # First time step
print(f"\nTime Slice (t=0):\n{time_slice}")
time_slice[0][0].values


Time Slice (t=0):
<xarray.DataArray 't2m' (latitude: 721, longitude: 1440)> Size: 8MB
[1038240 values with dtype=float64]
Coordinates:
  * longitude  (longitude) float32 6kB -180.0 -179.8 -179.5 ... 179.5 179.8
  * latitude   (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0
    time       datetime64[ns] 8B 1984-01-01
Attributes:
    units:      K
    long_name:  2 metre temperature


array(248.72569322)

In [12]:
#Cube chunk (200x200x200)
cube_chunk = t2m.isel(time=slice(0, 200), latitude=slice(0, 200), longitude=slice(0, 200))
print(cube_chunk)
#cube_chunk[0][0].values

<xarray.DataArray 't2m' (time: 200, latitude: 200, longitude: 200)> Size: 64MB
[8000000 values with dtype=float64]
Coordinates:
  * longitude  (longitude) float32 800B -180.0 -179.8 -179.5 ... -130.5 -130.2
  * latitude   (latitude) float32 800B 90.0 89.75 89.5 ... 40.75 40.5 40.25
  * time       (time) datetime64[ns] 2kB 1984-01-01 1984-01-02 ... 1984-07-18
Attributes:
    units:      K
    long_name:  2 metre temperature


In [139]:
#Range query for cube chunk (index) -> cube access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50))  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

here [[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.6650887  248.6650887
   248.6650887 ]
  [248.63771891 248.63771891 248.63576393 ... 248.59275427 248.59275427
   248.59079928]
  ...
  [240.5303978  240.56558752 240.60077725 ... 242.76299021 242.82359473
   242.88419925]
  [240.45219842 240.48347817 240.51475792 ... 242.71607058 242.79035999
   242.86464941]
  [240.33489934 240.36422411 240.38768393 ... 242.64569113 242.71802556
   242.79231498]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.215493   246.215493
   246.215493  ]
  [246.27609752 246.27414254 246.27414254 ... 246.28978241 246.28978241
   246.2917374 ]
  ...
  [242.282064   242.29574889 242.31138877 ... 244.1725341  244.23704859
   244.30156308]
  [242.15303501 242.16476492 242.17844981 ... 244.28396822 244.37194253
   244.46187182]
  [24

In [162]:
#Range query for cube chunk (index) -> cube access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 40), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

here [[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66313371 248.66313371
   248.66313371]
  [248.63771891 248.63771891 248.63576393 ... 248.60643916 248.60448417
   248.60252919]
  ...
  [242.19213471 242.2312344  242.2683791  ... 243.84018672 243.88319638
   243.92816102]
  [241.95753655 241.99859123 242.03964591 ... 243.69160789 243.73461755
   243.77762721]
  [241.7229384  241.75812813 241.79136286 ... 243.51174931 243.57235383
   243.63295835]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.22331294 246.22135795
   246.22135795]
  [246.27609752 246.27414254 246.27414254 ... 246.27609752 246.27805251
   246.27805251]
  ...
  [245.47846381 245.44913904 245.41003935 ... 244.90369834 244.91738323
   244.93106812]
  [245.1109267  245.08746689 245.06400707 ... 244.69451499 244.70037994
   244.70819988]
  

In [151]:
#Range query for cube chunk (index) -> vertical cuboid access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 80), latitude=slice(0, 10), longitude=slice(0, 10))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.65726876 248.65726876
   248.65726876]
  [248.63771891 248.63771891 248.63576393 ... 248.63185396 248.63185396
   248.63185396]
  ...
  [248.25649692 248.2584519  248.26040688 ... 248.26627184 248.26822682
   248.27018181]
  [248.07272836 248.07663833 248.0805483  ... 248.09814316 248.10205313
   248.1059631 ]
  [247.8518151  247.85768006 247.86354501 ... 247.88895981 247.89286978
   247.89677975]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.24286278 246.24286278
   246.24286278]
  [246.27609752 246.27414254 246.27414254 ... 246.27414254 246.27218755
   246.27218755]
  ...
  [246.86454788 246.85672795 246.84695302 ... 246.80003339 246.79221345
   246.78243853]
  [247.01899167 247.00921674 246.99944182 ... 246.94861222 246.93688231
   246.92710739]
  [247.

In [156]:
#Range query for cube chunk (index) -> vertical cuboid access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66313371 248.66313371
   248.66313371]
  [248.63771891 248.63771891 248.63576393 ... 248.60643916 248.60448417
   248.60252919]
  ...
  [242.19213471 242.2312344  242.2683791  ... 243.84018672 243.88319638
   243.92816102]
  [241.95753655 241.99859123 242.03964591 ... 243.69160789 243.73461755
   243.77762721]
  [241.7229384  241.75812813 241.79136286 ... 243.51174931 243.57235383
   243.63295835]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.22331294 246.22135795
   246.22135795]
  [246.27609752 246.27414254 246.27414254 ... 246.27609752 246.27805251
   246.27805251]
  ...
  [245.47846381 245.44913904 245.41003935 ... 244.90369834 244.91738323
   244.93106812]
  [245.1109267  245.08746689 245.06400707 ... 244.69451499 244.70037994
   244.70819988]
  [244.

In [161]:
#Range query for cube chunk (index) -> horizontal cuboid access pattern
def time():
    query = cube_chunk.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66704368 248.66704368
   248.66899866]
  [248.63771891 248.63771891 248.63576393 ... 248.55169959 248.55169959
   248.55169959]
  ...
  [239.16777354 242.37199329 245.14416144 ... 249.16947472 249.28872878
   249.00330103]
  [238.46593407 241.70338856 244.77075938 ... 248.88600195 249.00134605
   248.89773186]
  [239.88916285 241.53525988 244.50683646 ... 248.76088294 248.880137
   248.91923669]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.24872774 246.25263771
   246.25459269]
  [246.27609752 246.27414254 246.27414254 ... 246.38948663 246.3933966
   246.39926155]
  ...
  [240.42091866 242.9545787  245.84991088 ... 254.17814525 254.21724494
   253.66789427]
  [240.63596697 243.19113183 245.83036103 ... 253.10290372 252.73145665
   252.30917998]
  [240.747

Vertical cuboid chunk (800x100x100)

In [135]:
#Vertical cuboid chunk (800x100x100)
vertical_cuboid_chunk = t2m.isel(time=slice(0, 800), latitude=slice(0, 100), longitude=slice(0, 100))
print(vertical_cuboid_chunk)

<xarray.DataArray 't2m' (time: 800, latitude: 100, longitude: 100)> Size: 64MB
[8000000 values with dtype=float64]
Coordinates:
  * longitude  (longitude) float32 400B -180.0 -179.8 -179.5 ... -155.5 -155.2
  * latitude   (latitude) float32 400B 90.0 89.75 89.5 ... 65.75 65.5 65.25
  * time       (time) datetime64[ns] 6kB 1984-01-01 1984-01-02 ... 1986-03-10
Attributes:
    units:      K
    long_name:  2 metre temperature


In [140]:
#Range query for vertical cuboid chunk (index) -> cube access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.6650887  248.6650887
   248.6650887 ]
  [248.63771891 248.63771891 248.63576393 ... 248.59275427 248.59275427
   248.59079928]
  ...
  [240.5303978  240.56558752 240.60077725 ... 242.76299021 242.82359473
   242.88419925]
  [240.45219842 240.48347817 240.51475792 ... 242.71607058 242.79035999
   242.86464941]
  [240.33489934 240.36422411 240.38768393 ... 242.64569113 242.71802556
   242.79231498]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.215493   246.215493
   246.215493  ]
  [246.27609752 246.27414254 246.27414254 ... 246.28978241 246.28978241
   246.2917374 ]
  ...
  [242.282064   242.29574889 242.31138877 ... 244.1725341  244.23704859
   244.30156308]
  [242.15303501 242.16476492 242.17844981 ... 244.28396822 244.37194253
   244.46187182]
  [242.031

In [163]:
#Range query for vertical cuboid chunk (index) -> cube access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 40), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66313371 248.66313371
   248.66313371]
  [248.63771891 248.63771891 248.63576393 ... 248.60643916 248.60448417
   248.60252919]
  ...
  [242.19213471 242.2312344  242.2683791  ... 243.84018672 243.88319638
   243.92816102]
  [241.95753655 241.99859123 242.03964591 ... 243.69160789 243.73461755
   243.77762721]
  [241.7229384  241.75812813 241.79136286 ... 243.51174931 243.57235383
   243.63295835]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.22331294 246.22135795
   246.22135795]
  [246.27609752 246.27414254 246.27414254 ... 246.27609752 246.27805251
   246.27805251]
  ...
  [245.47846381 245.44913904 245.41003935 ... 244.90369834 244.91738323
   244.93106812]
  [245.1109267  245.08746689 245.06400707 ... 244.69451499 244.70037994
   244.70819988]
  [244.

In [150]:
#Range query for vertical cuboid chunk (index) -> vertical cuboid access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 80), latitude=slice(0, 10), longitude=slice(0, 10))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.65726876 248.65726876
   248.65726876]
  [248.63771891 248.63771891 248.63576393 ... 248.63185396 248.63185396
   248.63185396]
  ...
  [248.25649692 248.2584519  248.26040688 ... 248.26627184 248.26822682
   248.27018181]
  [248.07272836 248.07663833 248.0805483  ... 248.09814316 248.10205313
   248.1059631 ]
  [247.8518151  247.85768006 247.86354501 ... 247.88895981 247.89286978
   247.89677975]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.24286278 246.24286278
   246.24286278]
  [246.27609752 246.27414254 246.27414254 ... 246.27414254 246.27218755
   246.27218755]
  ...
  [246.86454788 246.85672795 246.84695302 ... 246.80003339 246.79221345
   246.78243853]
  [247.01899167 247.00921674 246.99944182 ... 246.94861222 246.93688231
   246.92710739]
  [247.

In [155]:
#Range query for vertical cuboid chunk (index) -> vertical cuboid access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66313371 248.66313371
   248.66313371]
  [248.63771891 248.63771891 248.63576393 ... 248.60643916 248.60448417
   248.60252919]
  ...
  [242.19213471 242.2312344  242.2683791  ... 243.84018672 243.88319638
   243.92816102]
  [241.95753655 241.99859123 242.03964591 ... 243.69160789 243.73461755
   243.77762721]
  [241.7229384  241.75812813 241.79136286 ... 243.51174931 243.57235383
   243.63295835]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.22331294 246.22135795
   246.22135795]
  [246.27609752 246.27414254 246.27414254 ... 246.27609752 246.27805251
   246.27805251]
  ...
  [245.47846381 245.44913904 245.41003935 ... 244.90369834 244.91738323
   244.93106812]
  [245.1109267  245.08746689 245.06400707 ... 244.69451499 244.70037994
   244.70819988]
  [244.

In [160]:
#Range query for vertical cuboid chunk (index) -> horizontal cuboid access pattern
def time():
    query = vertical_cuboid_chunk.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66704368 248.66704368
   248.66899866]
  [248.63771891 248.63771891 248.63576393 ... 248.55169959 248.55169959
   248.55169959]
  ...
  [239.16777354 242.37199329 245.14416144 ... 249.16947472 249.28872878
   249.00330103]
  [238.46593407 241.70338856 244.77075938 ... 248.88600195 249.00134605
   248.89773186]
  [239.88916285 241.53525988 244.50683646 ... 248.76088294 248.880137
   248.91923669]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.24872774 246.25263771
   246.25459269]
  [246.27609752 246.27414254 246.27414254 ... 246.38948663 246.3933966
   246.39926155]
  ...
  [240.42091866 242.9545787  245.84991088 ... 254.17814525 254.21724494
   253.66789427]
  [240.63596697 243.19113183 245.83036103 ... 253.10290372 252.73145665
   252.30917998]
  [240.747

Horizontal cuboid chunk (50x400x400)

In [137]:
#Horizontal cuboid chunk (50x400x400)
horizontal_cuboid_chunk = t2m.isel(time=slice(0, 50), latitude=slice(0, 400), longitude=slice(0, 400))
print(horizontal_cuboid_chunk)

<xarray.DataArray 't2m' (time: 50, latitude: 400, longitude: 400)> Size: 64MB
[8000000 values with dtype=float64]
Coordinates:
  * longitude  (longitude) float32 2kB -180.0 -179.8 -179.5 ... -80.5 -80.25
  * latitude   (latitude) float32 2kB 90.0 89.75 89.5 89.25 ... -9.25 -9.5 -9.75
  * time       (time) datetime64[ns] 400B 1984-01-01 1984-01-02 ... 1984-02-19
Attributes:
    units:      K
    long_name:  2 metre temperature


In [141]:
#Range query for horizontal cuboid chunk (index) -> cube access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 50), latitude=slice(0, 50), longitude=slice(0, 50))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.6650887  248.6650887
   248.6650887 ]
  [248.63771891 248.63771891 248.63576393 ... 248.59275427 248.59275427
   248.59079928]
  ...
  [240.5303978  240.56558752 240.60077725 ... 242.76299021 242.82359473
   242.88419925]
  [240.45219842 240.48347817 240.51475792 ... 242.71607058 242.79035999
   242.86464941]
  [240.33489934 240.36422411 240.38768393 ... 242.64569113 242.71802556
   242.79231498]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.215493   246.215493
   246.215493  ]
  [246.27609752 246.27414254 246.27414254 ... 246.28978241 246.28978241
   246.2917374 ]
  ...
  [242.282064   242.29574889 242.31138877 ... 244.1725341  244.23704859
   244.30156308]
  [242.15303501 242.16476492 242.17844981 ... 244.28396822 244.37194253
   244.46187182]
  [242.031

In [165]:
#Range query for horizontal cuboid chunk (index) -> cube access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 40), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66313371 248.66313371
   248.66313371]
  [248.63771891 248.63771891 248.63576393 ... 248.60643916 248.60448417
   248.60252919]
  ...
  [242.19213471 242.2312344  242.2683791  ... 243.84018672 243.88319638
   243.92816102]
  [241.95753655 241.99859123 242.03964591 ... 243.69160789 243.73461755
   243.77762721]
  [241.7229384  241.75812813 241.79136286 ... 243.51174931 243.57235383
   243.63295835]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.22331294 246.22135795
   246.22135795]
  [246.27609752 246.27414254 246.27414254 ... 246.27609752 246.27805251
   246.27805251]
  ...
  [245.47846381 245.44913904 245.41003935 ... 244.90369834 244.91738323
   244.93106812]
  [245.1109267  245.08746689 245.06400707 ... 244.69451499 244.70037994
   244.70819988]
  [244.

In [149]:
#Range query for horizontal cuboid chunk (index) -> vertical cuboid access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 80), latitude=slice(0, 10), longitude=slice(0, 10))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.65726876 248.65726876
   248.65726876]
  [248.63771891 248.63771891 248.63576393 ... 248.63185396 248.63185396
   248.63185396]
  ...
  [248.25649692 248.2584519  248.26040688 ... 248.26627184 248.26822682
   248.27018181]
  [248.07272836 248.07663833 248.0805483  ... 248.09814316 248.10205313
   248.1059631 ]
  [247.8518151  247.85768006 247.86354501 ... 247.88895981 247.89286978
   247.89677975]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.24286278 246.24286278
   246.24286278]
  [246.27609752 246.27414254 246.27414254 ... 246.27414254 246.27218755
   246.27218755]
  ...
  [246.86454788 246.85672795 246.84695302 ... 246.80003339 246.79221345
   246.78243853]
  [247.01899167 247.00921674 246.99944182 ... 246.94861222 246.93688231
   246.92710739]
  [247.

In [154]:
#Range query for horizontal cuboid chunk (index) -> vertical cuboid access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 100), latitude=slice(0, 40), longitude=slice(0, 40))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66313371 248.66313371
   248.66313371]
  [248.63771891 248.63771891 248.63576393 ... 248.60643916 248.60448417
   248.60252919]
  ...
  [242.19213471 242.2312344  242.2683791  ... 243.84018672 243.88319638
   243.92816102]
  [241.95753655 241.99859123 242.03964591 ... 243.69160789 243.73461755
   243.77762721]
  [241.7229384  241.75812813 241.79136286 ... 243.51174931 243.57235383
   243.63295835]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.22331294 246.22135795
   246.22135795]
  [246.27609752 246.27414254 246.27414254 ... 246.27609752 246.27805251
   246.27805251]
  ...
  [245.47846381 245.44913904 245.41003935 ... 244.90369834 244.91738323
   244.93106812]
  [245.1109267  245.08746689 245.06400707 ... 244.69451499 244.70037994
   244.70819988]
  [244.

In [159]:
#Range query for horizontal cuboid chunk (index) -> horizontal cuboid access pattern
def time():
    query = horizontal_cuboid_chunk.isel(time=slice(0, 25), latitude=slice(0, 100), longitude=slice(0, 100))  # First time step
    print(query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

[[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66704368 248.66704368
   248.66899866]
  [248.63771891 248.63771891 248.63576393 ... 248.55169959 248.55169959
   248.55169959]
  ...
  [239.16777354 242.37199329 245.14416144 ... 249.16947472 249.28872878
   249.00330103]
  [238.46593407 241.70338856 244.77075938 ... 248.88600195 249.00134605
   248.89773186]
  [239.88916285 241.53525988 244.50683646 ... 248.76088294 248.880137
   248.91923669]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.24872774 246.25263771
   246.25459269]
  [246.27609752 246.27414254 246.27414254 ... 246.38948663 246.3933966
   246.39926155]
  ...
  [240.42091866 242.9545787  245.84991088 ... 254.17814525 254.21724494
   253.66789427]
  [240.63596697 243.19113183 245.83036103 ... 253.10290372 252.73145665
   252.30917998]
  [240.747

extra testing

In [108]:
#Point query for vertical cuboid chunk (800x100x100)
def time():
    query = vertical_cuboid_chunk.sel(time='1984-01-01', latitude=90.0, longitude=-180.0)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

here 248.725693217924
Execution Time: 0.003773000091314316


In [100]:
#Range query for cube chunk (values)
def time():
    time_range = slice('1984-01-01', '1985-08-01')
    latitude_range = slice(90.0, 75.0)
    longitude_range = slice(-180.0, -150.0)
    query = cube_chunk.sel(time=time_range, latitude=latitude_range, longitude=longitude_range)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

here [[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.68659353 248.68659353
   248.68854851]
  [248.63771891 248.63771891 248.63576393 ... 248.56538448 248.56538448
   248.56538448]
  ...
  [239.67802452 239.70734929 239.74058402 ... 243.47655958 243.46287469
   243.44527983]
  [239.64674476 239.67020458 239.69561938 ... 243.33384571 243.296701
   243.25955629]
  [239.73276409 239.7562239  239.78359369 ... 242.9917234  242.9408938
   242.9037491 ]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.30737728 246.30933226
   246.31128724]
  [246.27609752 246.27414254 246.27414254 ... 246.49310081 246.49701078
   246.50287573]
  ...
  [241.15990284 241.17163275 241.1774977  ... 243.05819288 243.0816527
   243.0933826 ]
  [240.96635937 240.9741793  240.98199924 ... 243.02495814 243.02691313
   243.02886811]
  [240

In [112]:
#Point query for horizontal cuboid chunk (100x800x800)
def time():
    query = horizontal_cuboid_chunk.sel(time='1984-01-01', latitude=90.0, longitude=-180.0)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

here 248.725693217924
Execution Time: 0.0030366999562829733


In [105]:
#Point query for cube chunk
def time():
    query = cube_chunk.sel(time='1984-01-01', latitude=90.0, longitude=-180.0)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

here <bound method Mapping.values of <xarray.Dataset> Size: 24B
Dimensions:    ()
Coordinates:
    longitude  float32 4B -180.0
    latitude   float32 4B 90.0
    time       datetime64[ns] 8B 1984-01-01
Data variables:
    t2m        float64 8B ...
Attributes:
    Conventions:  CF-1.6
    history:      2024-07-30 17:50:29 GMT by grib_to_netcdf-2.28.1: /opt/ecmw...>
Execution Time: 0.003676600055769086


In [46]:
# 4. Horizontal Cuboid-Shaped Chunk (5 time steps, 5 latitudes, 5 longitudes)
def test():
    cube_chunk = t2m.isel(time=slice(0, 4), latitude=slice(0, 4000), longitude=slice(0, 4000))
    print(cube_chunk)
    cube_chunk[0][0].values
execution_time = timeit.timeit(test, number = 1)
print(f"Execution Time: {execution_time}")

<xarray.DataArray 't2m' (time: 4, latitude: 721, longitude: 1440)> Size: 33MB
[4152960 values with dtype=float64]
Coordinates:
  * longitude  (longitude) float32 6kB -180.0 -179.8 -179.5 ... 179.5 179.8
  * latitude   (latitude) float32 3kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0
  * time       (time) datetime64[ns] 32B 1984-01-01 1984-01-02 ... 1984-01-04
Attributes:
    units:      K
    long_name:  2 metre temperature
Execution Time: 0.008126299944706261


In [114]:
#Range query for vertical cuboid chunk (10000x80x80)
def time():
    time_range = slice('1984-01-01', '1985-08-01')
    latitude_range = slice(90.0, 75.0)
    longitude_range = slice(-180.0, -150.0)
    query = vertical_cuboid_chunk.sel(time=time_range, latitude=latitude_range, longitude=longitude_range)  # First time step
    print("here",query.values)
execution_time = timeit.timeit(time, number = 1)
print(f"Execution Time: {execution_time}")

here [[[248.72569322 248.72569322 248.72569322 ... 248.72569322 248.72569322
   248.72569322]
  [248.65922374 248.65922374 248.65922374 ... 248.66704368 248.66704368
   248.66899866]
  [248.63771891 248.63771891 248.63576393 ... 248.55169959 248.55169959
   248.55169959]
  ...
  [239.67802452 239.70734929 239.74058402 ... 243.40031518 243.41986503
   243.43941488]
  [239.64674476 239.67020458 239.69561938 ... 243.24196143 243.27519617
   243.30843091]
  [239.73276409 239.7562239  239.78359369 ... 243.16180706 243.18526688
   243.20286174]]

 [[246.48528087 246.48528087 246.48528087 ... 246.48528087 246.48528087
   246.48528087]
  [246.24481777 246.24481777 246.24481777 ... 246.24872774 246.25263771
   246.25459269]
  [246.27609752 246.27414254 246.27414254 ... 246.38948663 246.3933966
   246.39926155]
  ...
  [241.15990284 241.17163275 241.1774977  ... 243.169627   243.15007716
   243.13052731]
  [240.96635937 240.9741793  240.98199924 ... 243.0308231  243.05037294
   243.06992279]
  [