# Video 5: SSoT and Lazy Expressions

#### Step 0: Move Data to Server

In [None]:
# Install necessary libraries
!pip install caterva2 blosc2 blosc2_grok matplotlib "xarray[complete]>=2025.1.2" "zarr>=3.0.4" requests aiohttp fsspec
# Imports
import os.path
import time

import blosc2
import numpy as np
import xarray as xr

import caterva2 as cat2

In [None]:
# Get data from dynamical.org
dir_path = "weather-data"
ds = xr.open_zarr("https://data.dynamical.org/noaa/gfs/analysis-hourly/latest.zarr")
lat = slice(70, 0)
long = slice(0, 70)
datestart, dateend = "2023-01-01", "2024-01-01"
cparams = {"codec": blosc2.Codec.ZSTD, "clevel": 6}

datasets = [
    ("precipitation_surface", "precip"),
    ("temperature_2m", "temp"),
    ("wind_u_10m", "windu"),
    ("wind_v_10m", "windv"),
]
# Download data locally
if not os.path.isdir(dir_path):
    os.mkdir(dir_path)

for dset, short in datasets:
    print(f"Fetching dataset {dset} from dynamical.org...")
    arr = ds[dset].sel(time=slice(datestart, dateend), latitude=lat, longitude=long)
    blosc2.asarray(arr.values, urlpath=f"{dir_path}/{short}.b2nd", mode="w", cparams=cparams)

# Download extra month of data to append later
datestart, dateend = "2024-01-01", "2024-01-31"
for dset, short in datasets:
    print(f"Fetching dataset {dset} from dynamical.org...")
    arr = ds[dset].sel(time=slice(datestart, dateend), latitude=lat, longitude=long)
    blosc2.asarray(arr.values, urlpath=f"{dir_path}/{short}Extra.b2nd", mode="w", cparams=cparams)

In [None]:
# Setup client connection to server
client = cat2.Client("https://cat2.cloud/demo", ("user@example.com", "foobar11"))
client.get_roots()
myroot = client.get("@shared")
# Upload the files to the server
list_of_datasets = []
for _, short in datasets:
    uploadaddress = f"{dir_path}/{short}.b2nd"
    apath = client.upload(uploadaddress, myroot.name + "/" + uploadaddress)
    list_of_datasets += [apath]

In [None]:
list_of_datasets = ["weather-data/" + name for name in client.get_list(myroot.name + "/" + "weather-data")]
list_of_datasets = [
    f for f in list_of_datasets if not (f.startswith("weather-data/m") or f.startswith("weather-data/s"))
]
list_of_datasets

#### Step 1: Save Lazy Expressions on Server

In [None]:
# Lazy Expressions, saved in @personal
list_of_stats = []
for dset in list_of_datasets:
    locdict = {}
    locds = myroot[dset]
    apath = locds.path
    short = str(apath).split(".")[0].split("/")[-1]
    locdict["mean_" + short] = client.get(
        client.lazyexpr("mean_" + short, "mean(a, axis = 0)", {"a": apath})
    )
    locdict["std_" + short] = client.get(client.lazyexpr("std_" + short, "std(a, axis = 0)", {"a": apath}))
    locdict["sum_" + short] = client.get(client.lazyexpr("sum_" + short, "sum(a, axis = 0)", {"a": apath}))
    list_of_stats += [locdict]

In [None]:
# Move Lazy Expressions to @shared to enable collaboration
for stat in list_of_stats:
    for k, v in stat.items():
        newpath = client.move(v.path, myroot.name + "/weather-data/" + str(v.path).split("/")[-1])
        stat[k] = client.get(newpath)

#### Step 2: Query Lazy Expressions

In [None]:
v = list_of_stats[0]["mean_precip"]
x = v[20, 100]  # Return numpy array
ans = np.format_float_positional(x, unique=False, fractional=False, trim="k", precision=4)
print(f"mean_precip[20, 100]: {ans}")

#### Step 3: Append extra month of data to the server-hosted datasets

In [None]:
mybytes = 0
t1 = time.time()
for _, short in datasets:
    locfile = f"{dir_path}/{short}Extra.b2nd"
    uploadaddress = f"{dir_path}/{short}.b2nd"
    mydataset = myroot[uploadaddress]
    arr = blosc2.open(locfile, mode="r")
    mybytes += arr.schunk.nbytes
    newshape = mydataset.append(arr)
t2 = time.time()
numMB = mybytes / 2**20
dt = t2 - t1
print(f"Appended {numMB:.2f} MB in {dt:.2f} seconds. Bandwidth: {numMB / dt:.2f} MB/s.")

#### Step 4: Requery Lazy Expressions

In [None]:
v = list_of_stats[0]["mean_precip"]
x = v[20, 100]  # Return numpy array
ans = np.format_float_positional(x, unique=False, fractional=False, trim="k", precision=4)
print(f"mean_precip[20, 100]: {ans}")

In [None]:
for f in client.get("@shared").file_list:
    if f.startswith("weather-data/m") or f.startswith("weather-data/s"):
        client.remove("@shared/" + f)