## Example processing Sentinel-2 data with Dask (remote cluster)

In [None]:
import datacube
from dask_gateway import Gateway
from dask.distributed import Client
import matplotlib.pyplot as plt
import time

In [None]:
# Initialise datacube

dc = datacube.Datacube()

In [None]:
# (Central NSW)
x_min, x_max = 1200000, 1300000
y_min, y_max = -3600000, -3700000
date_range = ("2024-01-01", "2024-02-28")

In [None]:
# Load datasets (lazy)

product = "ga_s2bm_ard_3"  # Sentinel-2 B
measurements = ["nbart_red", "nbart_blue", "oa_s2cloudless_mask"]
output_crs = "EPSG:3577"
resolution = [-30, 30]

dask_chunks = {
    "time": 1,
    "y": 500,
    "x": 500
}

ds = dc.load(product=product,
             measurements=measurements,
             crs="EPSG:3577",
             x=(x_min, x_max),
             y=(y_min, y_max),
             time=date_range,
             output_crs=output_crs,
             resolution=resolution,
             dask_chunks=dask_chunks,
             dataset_predicate=lambda ds: ds.metadata.dataset_maturity == "final",
             skip_broken_datasets=True  # Important!
             )

In [None]:
# Define some computations

no_clouds_ds = ds.where(ds["oa_s2cloudless_mask"] == 1)
ratio_ds = no_clouds_ds["nbart_red"] / no_clouds_ds["nbart_blue"]
mean_ratio_ds = ratio_ds.mean(dim="time", skipna=True)

In [None]:
# Start a remote Dask cluster

gateway = Gateway()

# List available cluster options (optional)
print(gateway.list_clusters())

options = gateway.cluster_options()

options.worker_cores = 1
options.worker_threads = 1
options.worker_memory = 1  # (GB)

# Create a new cluster
cluster = gateway.new_cluster(cluster_options=options)

# Scale workers (optional)
num_workers = 16
cluster.scale(num_workers)  # or .adapt(minimum=4, maximum=16)

# Connect to it
client = Client(cluster)

# Dashboard link (optional)
print(client.dashboard_link)

# Await cluster initialisation
client.wait_for_workers(n_workers=num_workers)

In [None]:
%%time
mean_ratio_ds = mean_ratio_ds.compute()

In [None]:
# Clean-up
client.close()
cluster.close(shutdown=True)

In [None]:
mean_ratio_ds

In [None]:
# Visualise mean ratio dataset

band = mean_ratio_ds

# Plot with xarray’s wrapper around matplotlib
band.plot.imshow(cmap="viridis")  # or cmap='gray', 'RdYlGn', etc.
plt.title("Result")
plt.xlabel("x")
plt.ylabel("y")
plt.show()