From 0ffbfd5d691fff4c96e5c2c0e67be0907d126137 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 30 Sep 2025 12:27:16 -0700 Subject: [PATCH] Add docs for xbeam.Dataset PiperOrigin-RevId: 813378676 --- docs/api.md | 26 +++- docs/conf.py | 48 +++++- docs/high-level.ipynb | 322 ++++++++++++++++++++++++++++++++++++++++ docs/index.md | 1 + docs/read-write.ipynb | 175 ++++++++-------------- docs/requirements.txt | 2 +- xarray_beam/__init__.py | 2 +- 7 files changed, 454 insertions(+), 122 deletions(-) create mode 100644 docs/high-level.ipynb diff --git a/docs/api.md b/docs/api.md index bcf50a6..acd646d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -78,4 +78,28 @@ split_chunks split_variables in_memory_rechunk -``` \ No newline at end of file +``` + +## High-level interface + +```{warning} +The high-level interface is experimental and offers no backwards compatibility +guarantees. +``` + +```{eval-rst} +.. autosummary:: + :toctree: _autosummary + + Dataset + Dataset.from_xarray + Dataset.from_zarr + Dataset.to_zarr + Dataset.collect_with_direct_runner + Dataset.map_blocks + Dataset.rechunk + Dataset.split_variables + Dataset.consolidate_variables + Dataset.head + Dataset.pipe +``` diff --git a/docs/conf.py b/docs/conf.py index 981a44d..fe705e6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,12 +14,15 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) -# Print Python environment info for easier debugging on ReadTheDocs - +import inspect +import operator +import os import sys import subprocess + import xarray_beam # verify this works +# Print Python environment info for easier debugging on ReadTheDocs print("python exec:", sys.executable) print("sys.path:", sys.path) print("pip environment:") @@ -42,6 +45,7 @@ extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', + 'sphinx.ext.linkcode', 'sphinx.ext.napoleon', 'myst_nb', ] @@ -63,7 +67,14 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = 'furo' # https://pradyunsg.me/furo/quickstart/ + +html_theme_options = { + 'source_repository': 'https://github.com/google/xarray-beam/', + 'source_branch': 'main', + 'source_directory': 'docs/', + 'sidebar_hide_name': False, +} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -81,3 +92,34 @@ # https://stackoverflow.com/a/66295922/809705 autodoc_typehints = "description" + + +# Customize code links via sphinx.ext.linkcode +# Borrowed from JAX: https://github.com/google/jax/pull/20961 + + +def linkcode_resolve(domain, info): + if domain != 'py': + return None + if not info['module']: + return None + if not info['fullname']: + return None + try: + mod = sys.modules.get(info['module']) + obj = operator.attrgetter(info['fullname'])(mod) + if isinstance(obj, property): + obj = obj.fget + while hasattr(obj, '__wrapped__'): # decorated functions + obj = obj.__wrapped__ + filename = inspect.getsourcefile(obj) + source, linenum = inspect.getsourcelines(obj) + print(f'found source code for: {info}') + except Exception as e: + print(f'did not find source code for: {info}: {e}') + return None + filename = os.path.relpath( + filename, start=os.path.dirname(xarray_beam.__file__) + ) + lines = f'#L{linenum}-L{linenum + len(source)}' if linenum else '' + return f'https://github.com/google/xarray-beam/blob/main/xarray_beam/{filename}{lines}' diff --git a/docs/high-level.ipynb b/docs/high-level.ipynb new file mode 100644 index 0000000..128564e --- /dev/null +++ b/docs/high-level.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "metadata": { + "id": "ZQCqHh_IxEf6" + }, + "cell_type": "markdown", + "source": [ + "# High-level datasets" + ] + }, + { + "metadata": { + "id": "z8MqtGzHxL8F" + }, + "cell_type": "markdown", + "source": [ + "{py:class}`xarray_beam.Dataset` is new (as of September 2025) high-level interface for Xarray-Beam.\n", + "\n", + "It requires less boilerplate code than the current (explicit) interface, and accordingly should be an easier to use tool, especially for non-expert users. You still need to think about how your data is divided into chunks, but the data model of `Dataset` keep track of the high-level structure of your data, avoiding the need to manually building templates in {py:class}`~xarray_beam.ChunksToZarr`.\n", + "\n", + "```{warning}\n", + "The `Dataset` interface is experimental, and currently offers no backwards compatibility guarantees.\n", + "```" + ] + }, + { + "metadata": { + "id": "Ht5100QIRpyd", + "tags": [ + "hide-cell" + ] + }, + "cell_type": "code", + "source": [ + "# small formatting improvements\n", + "import contextlib\n", + "\n", + "@contextlib.contextmanager\n", + "def print_error():\n", + " try:\n", + " yield\n", + " except Exception as e:\n", + " print(f'{type(e).__name__}: {e}')" + ], + "outputs": [], + "execution_count": 2 + }, + { + "metadata": { + "id": "EkH9Na4ezuTi" + }, + "cell_type": "markdown", + "source": [ + "## Data model\n", + "\n", + "Dataset is a wrapper over a series of Beam transformations, adding metadata describing the corresponding `xarray.Dataset` and how is distributed with Beam:\n", + "\n", + "- `ptransform` is the wrapped `beam.PTransform` to compute the chunks of the dataset.\n", + "- `template` is a lazily-computed `xarray.Dataset` indicating the structure of the overall dataset.\n", + "- `chunks` is dictionary mapping from dimension names to integer chunk sizes, indicating the size of each chunk.\n", + "- `split_vars` is a boolean indicating if ptransform elements each contain only a single variable from the dataset, rather than all variables.\n", + "\n", + "This information is surfaced via `xbeam.Dataset.__repr__()`:" + ] + }, + { + "metadata": { + "id": "3N0wXjXRy9cK" + }, + "cell_type": "code", + "source": [ + "import apache_beam as beam\n", + "import xarray_beam as xbeam\n", + "import xarray\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "xarray_ds = xarray.Dataset(\n", + " {'temperature': (('time', 'longitude', 'latitude'), np.random.randn(365, 180, 90))},\n", + " coords={'time': pd.date_range('2025-01-01', freq='1D', periods=365)},\n", + ")\n", + "chunks = {'time': 100, 'longitude': 90, 'latitude': 90}\n", + "xbeam_ds = xbeam.Dataset.from_xarray(xarray_ds, chunks)\n", + "xbeam_ds" + ], + "outputs": [], + "execution_count": 3 + }, + { + "metadata": { + "id": "k5U2hsNr51FJ" + }, + "cell_type": "markdown", + "source": [ + "Xarray-Beam pipelines typically read and write data to [Zarr](https://zarr.dev/), so we'll start by writing our example data to a Zarr file (with plain Xarray/Dask) for us to read later:" + ] + }, + { + "metadata": { + "id": "cqDLwyBG51FJ" + }, + "cell_type": "code", + "source": [ + "xarray_ds.chunk(chunks).to_zarr('example_data.zarr', mode='w')" + ], + "outputs": [], + "execution_count": 4 + }, + { + "metadata": { + "id": "fb-Wx7dl1ScK" + }, + "cell_type": "markdown", + "source": [ + "## Writing pipelines\n", + "\n", + "Most Xarray-Beam pipelines can be written via a handful of Dataset methods:\n", + "\n", + "- {py:meth}`~xarray_beam.Dataset.from_zarr`: Load a dataset from a Zarr store.\n", + "- {py:meth}`~xarray_beam.Dataset.rechunk`: Adjust chunks on a dataset.\n", + "- {py:meth}`~xarray_beam.Dataset.map_blocks`: Map a function over every chunk of this dataset independently.\n", + "- {py:meth}`~xarray_beam.Dataset.to_zarr`: Write a dataset to a Zarr store.\n", + "\n", + "All non-trivial computation happens via the embarrasingly parallel `map_blocks` method.\n", + "\n", + "In order for `map_blocks` to work, data needs to be appropriately chunked. Here are a few typical chunking patterns that work well for most needs:\n", + "\n", + "- \"Pencil\" chunks, which group together all times, and parallelize over space. These long and skinny chunks look like a box of pencils:" + ] + }, + { + "metadata": { + "id": "EpOpwzZS9Rte" + }, + "cell_type": "code", + "source": [ + "xarray_ds.temperature.chunk({'time': -1, 'latitude': 20, 'longitude': 20}).data" + ], + "outputs": [], + "execution_count": 5 + }, + { + "metadata": { + "id": "74t0dh3E9kny" + }, + "cell_type": "markdown", + "source": [ + "- \"Pancake\" chunks, which group together all spatial locations, and parallelize over time. These flat and wide chunks look like a stack of pancakes:" + ] + }, + { + "metadata": { + "id": "fho4ub-69mD5" + }, + "cell_type": "code", + "source": [ + "xarray_ds.temperature.chunk({'time': 1, 'latitude': -1, 'longitude': -1}).data" + ], + "outputs": [], + "execution_count": 6 + }, + { + "metadata": { + "id": "t3APp6uB9yjT" + }, + "cell_type": "markdown", + "source": [ + "Weather/climate datasets are typically generated and stored in pancake chunks, but pencil chunks are more useful for most analytics queries, which requires large histories of weather at a single location. Intermediate \"compromise\" chunks can sometimes be a good idea, although if performance and flexibility are critical it may be worth storing multiple copies of your data in different formats.\n", + "\n", + "Using the right chunks is *absolutely essentially* for efficient operations with Xarray-Beam and Zarr. For example, reading data from a single location across all times (a \"pencil\" query) is extremely inefficient for a dataset stored in \"pancake\" chunks -- it would require loading the entire dataset from disk!\n", + "\n", + "Rechunking is a fundamentally an expensive operation (it requires multiple complete reads/writes of a dataset from disk), but in Xarray-Beam it's straightforward, via {py:meth}`~xarray_beam.Dataset.rechunk`." + ] + }, + { + "metadata": { + "id": "-SXovp9B6LwH" + }, + "cell_type": "markdown", + "source": [ + "### Example 1: Climatology\n", + "\n", + "Here we need to group together all time points in the same chunk (\"pencil chunks\"), parallelizing over space:" + ] + }, + { + "metadata": { + "id": "WIOFiC1z40mh" + }, + "cell_type": "code", + "source": [ + "with beam.Pipeline() as p:\n", + " p | (\n", + " xbeam.Dataset.from_zarr('example_data.zarr')\n", + " .rechunk({'time': -1, 'latitude': 30, 'longitude': 30})\n", + " .map_blocks(lambda ds: ds.groupby('time.month').mean())\n", + " .to_zarr('example_climatology.zarr')\n", + " )\n", + "xarray.open_zarr('example_climatology.zarr')" + ], + "outputs": [], + "execution_count": 7 + }, + { + "metadata": { + "id": "O14EmS3J8e6H" + }, + "cell_type": "markdown", + "source": [ + "### Example 2: Regridding over space\n", + "\n", + "Here we need to group all space points in the same chunk (\"pancake chunks\"), parallelizing over time:" + ] + }, + { + "metadata": { + "id": "kms-N92ZzWxy" + }, + "cell_type": "code", + "source": [ + "with beam.Pipeline() as p:\n", + " p | (\n", + " xbeam.Dataset.from_zarr('example_data.zarr')\n", + " .rechunk({'time': 10, 'latitude': -1, 'longitude': -1})\n", + " .map_blocks(lambda ds: ds.coarsen(latitude=2, longitude=2).mean())\n", + " .to_zarr('example_regrid.zarr')\n", + " )\n", + "xarray.open_zarr('example_regrid.zarr')" + ], + "outputs": [], + "execution_count": 8 + }, + { + "metadata": { + "id": "FpJBZ6XsB_oE" + }, + "cell_type": "markdown", + "source": [ + "## Limitations of map_blocks\n", + "\n", + "In the examples above, {py:meth}`~xarray_beam.Dataset.map_blocks` somehow automatically knew the appropriate structure of the output `template`, without evaluating any chunked data. How could this work?\n", + "\n", + "For building templates, Xarray-Beam relies on lazy evaluation with [Dask arrays](https://docs.dask.org/en/stable/array.html). This requires that applied functions are Dask compatible. Almost all built-in Xarray operations are Dask compatible, but if your applied function is _not_ Dask compatible (e.g., because it loads array values into memory), Xarray-Beam will show an informative error:" + ] + }, + { + "metadata": { + "id": "4TB_pzjTDLfS" + }, + "cell_type": "code", + "source": [ + "with print_error():\n", + " (\n", + " xbeam.Dataset.from_zarr('example_data.zarr')\n", + " .map_blocks(lambda ds: ds.compute()) # load into memory\n", + " )" + ], + "outputs": [], + "execution_count": 9 + }, + { + "metadata": { + "id": "vCjZK9fmEeEq" + }, + "cell_type": "markdown", + "source": [ + "You can avoid these errors by explicitly [creating a template](creating_templates):" + ] + }, + { + "metadata": { + "id": "yOGT81UYC2-0" + }, + "cell_type": "code", + "source": [ + "ds_beam = xbeam.Dataset.from_zarr('example_data.zarr')\n", + "ds_beam.map_blocks(lambda ds: ds.compute(), template=ds_beam.template)" + ], + "outputs": [], + "execution_count": 10 + }, + { + "metadata": { + "id": "75IG-22cKcuE" + }, + "cell_type": "markdown", + "source": [ + "In other situations, you might want to perform an operation that returns something other than an `xarray.Dataset`, e.g., to write all chunks as individual files to disk. In these situations, you can switch to the lower-level Xarray-Beam [data model](data-model), and use raw Beam operations:" + ] + }, + { + "metadata": { + "id": "SlnFfo2YKZUN" + }, + "cell_type": "code", + "source": [ + "def to_netcdf(key: xbeam.Key, chunk: xarray.Dataset):\n", + " path = f\"{chunk.indexes['time'][0]:%Y-%m-%d}.nc\"\n", + " chunk.to_netcdf(path)\n", + "\n", + "with beam.Pipeline() as p:\n", + " p | (\n", + " xbeam.Dataset.from_zarr('example_data.zarr')\n", + " .rechunk({'latitude': -1, 'longitude': -1})\n", + " .ptransform\n", + " ) | beam.MapTuple(to_netcdf)\n", + "\n", + "%ls *.nc" + ], + "outputs": [], + "execution_count": 14 + } + ], + "metadata": { + "colab": { + "private_outputs": true + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/docs/index.md b/docs/index.md index bc25f06..19f254a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -15,5 +15,6 @@ data-model.ipynb read-write.ipynb aggregation.ipynb rechunking.ipynb +high-level.ipynb api.md ``` \ No newline at end of file diff --git a/docs/read-write.ipynb b/docs/read-write.ipynb index 8be67cc..3ae07d1 100644 --- a/docs/read-write.ipynb +++ b/docs/read-write.ipynb @@ -2,30 +2,24 @@ "cells": [ { "cell_type": "markdown", - "id": "c54a0838", - "metadata": { - "id": "b4sQqohzBS45" - }, + "id": "b4sQqohzBS45", + "metadata": {}, "source": [ "# Reading and writing data" ] }, { "cell_type": "markdown", - "id": "ed512674", - "metadata": { - "id": "DFgXiyk0BS45" - }, + "id": "DFgXiyk0BS45", + "metadata": {}, "source": [ "## Read datasets into chunks" ] }, { "cell_type": "markdown", - "id": "30edb09d", - "metadata": { - "id": "9u-o9LjvBS45" - }, + "id": "9u-o9LjvBS45", + "metadata": {}, "source": [ "There are two main options for loading an `xarray.Dataset` into Xarray-Beam. You can either [create the dataset](data-model.ipynb) from scratch or use the {py:class}`~xarray_beam.DatasetToChunks` transform starting at the root of a Beam pipeline:" ] @@ -33,10 +27,8 @@ { "cell_type": "code", "execution_count": 1, - "id": "427d32c2", - "metadata": { - "id": "9sNXzAtkBS45" - }, + "id": "9sNXzAtkBS45", + "metadata": {}, "outputs": [], "source": [ "import apache_beam as beam\n", @@ -49,10 +41,8 @@ { "cell_type": "code", "execution_count": 2, - "id": "6fe9fabe", - "metadata": { - "id": "5dvq3RHbBS45" - }, + "id": "5dvq3RHbBS45", + "metadata": {}, "outputs": [], "source": [ "ds = xarray.tutorial.load_dataset('air_temperature')" @@ -61,10 +51,8 @@ { "cell_type": "code", "execution_count": 3, - "id": "28eb3b6e", - "metadata": { - "id": "ZEhCjJXDBS45" - }, + "id": "ZEhCjJXDBS45", + "metadata": {}, "outputs": [ { "data": { @@ -114,10 +102,8 @@ }, { "cell_type": "markdown", - "id": "d7c825b8", - "metadata": { - "id": "k-PFVdDzBS45" - }, + "id": "k-PFVdDzBS45", + "metadata": {}, "source": [ "Importantly, xarray datasets fed into `DatasetToChunks` **can be lazy**, with data not already loaded eagerly into NumPy arrays. When you feed lazy datasets into `DatasetToChunks`, each individual chunk will be indexed and evaluated separately on Beam workers.\n", "\n", @@ -126,20 +112,16 @@ }, { "cell_type": "markdown", - "id": "70f09baa", - "metadata": { - "id": "Pl6UGVhyBS45" - }, + "id": "Pl6UGVhyBS45", + "metadata": {}, "source": [ "## Reading data from Zarr" ] }, { "cell_type": "markdown", - "id": "f7229f50", - "metadata": { - "id": "3FvrNiCjBS45" - }, + "id": "3FvrNiCjBS45", + "metadata": {}, "source": [ "[Zarr](https://zarr.readthedocs.io/) is the preferred file format for reading and writing data with Xarray-Beam, due to its excellent scalability and support inside Xarray.\n", "\n", @@ -152,10 +134,8 @@ { "cell_type": "code", "execution_count": 4, - "id": "10c17dc3", - "metadata": { - "id": "pfQRSu_iBS45" - }, + "id": "pfQRSu_iBS45", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -199,10 +179,8 @@ }, { "cell_type": "markdown", - "id": "83a1833a", - "metadata": { - "id": "_vDX8aenBS45" - }, + "id": "_vDX8aenBS45", + "metadata": {}, "source": [ "Conveniently, this is exactly the information you need for feeding into {py:class}`~xarray_beam.DatasetToChunks` to write an Xarray-Beam pipeline:" ] @@ -210,10 +188,8 @@ { "cell_type": "code", "execution_count": 5, - "id": "7b76ba13", - "metadata": { - "id": "U6imtj5CBS45" - }, + "id": "U6imtj5CBS45", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -232,31 +208,26 @@ }, { "cell_type": "markdown", - "id": "169f838c", - "metadata": { - "id": "y7H5uTsxBS45" - }, + "id": "y7H5uTsxBS45", + "metadata": {}, "source": [ "## Writing data to Zarr" ] }, { "cell_type": "markdown", - "id": "5bcf8ad1", - "metadata": { - "id": "re3c6-NTBS45" - }, + "id": "re3c6-NTBS45", + "metadata": {}, "source": [ "{py:class}`~xarray_beam.ChunksToZarr` is Xarray-Beam's API for saving chunks into a Zarr store." ] }, { "cell_type": "markdown", - "id": "012d88ee", - "metadata": { - "id": "qFsuz2R1BS45" - }, + "id": "qFsuz2R1BS45", + "metadata": {}, "source": [ + "(creating_templates)=\n", "### Creating templates\n", "\n", "The `template` argument to {py:class}`ChunksToZarr` prespecifies the structure of the full on disk dataset in the form of an lazy `xarray.Dataset`, so it can be filled in one chunk at a time in parallel using Beam. Lazy templates specify the structure of the array data that will be written by the PTransform. Array values that may be written as part of the Beam pipeline are indicated by using lazily computed Dask arrays to store the data.\n", @@ -267,10 +238,8 @@ { "cell_type": "code", "execution_count": 7, - "id": "bbf65917", - "metadata": { - "id": "M2TMQm9hBS45" - }, + "id": "M2TMQm9hBS45", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -301,10 +270,8 @@ }, { "cell_type": "markdown", - "id": "0b21fded", - "metadata": { - "id": "25vzYlbWBS45" - }, + "id": "25vzYlbWBS45", + "metadata": {}, "source": [ "```{tip}\n", "Under the covers, {py:func}`~xarray_beam.make_template` has a very simple implementation, equivalent to `xarray.zeros_like(ds.chunk(-1))`.\n", @@ -316,10 +283,8 @@ { "cell_type": "code", "execution_count": 8, - "id": "18eb8a29", - "metadata": { - "id": "q2ns7Ep3BS45" - }, + "id": "q2ns7Ep3BS45", + "metadata": {}, "outputs": [], "source": [ "# remove the \"time\" dimension, and insert a new \"sample\" dimension\n", @@ -329,10 +294,8 @@ { "cell_type": "code", "execution_count": 9, - "id": "eecfc041", - "metadata": { - "id": "izu99U1XBS45" - }, + "id": "izu99U1XBS45", + "metadata": {}, "outputs": [], "source": [ "# setup a template for spatially regridding along latitude and longitude\n", @@ -343,10 +306,8 @@ }, { "cell_type": "markdown", - "id": "e3079b5d", - "metadata": { - "id": "UJNKRbvUBS45" - }, + "id": "UJNKRbvUBS45", + "metadata": {}, "source": [ "### End to end examples\n", "\n", @@ -356,10 +317,8 @@ { "cell_type": "code", "execution_count": 10, - "id": "b6bd8cb7", - "metadata": { - "id": "i3Sd5CnKBS45" - }, + "id": "i3Sd5CnKBS45", + "metadata": {}, "outputs": [], "source": [ "ds_on_disk, chunks = xbeam.open_zarr('example-data.zarr')\n", @@ -377,10 +336,8 @@ }, { "cell_type": "markdown", - "id": "d06f806b", - "metadata": { - "id": "Uu-S6fehBS45" - }, + "id": "Uu-S6fehBS45", + "metadata": {}, "source": [ "If you don't have an existing Dataset to start with, a common pattern is to reuse the same function you'll use to load data for each chunk. In such cases, {py:func}`xarray_beam.replace_template_dims` is helpful for creating the full template:" ] @@ -388,10 +345,8 @@ { "cell_type": "code", "execution_count": 11, - "id": "5e161959", - "metadata": { - "id": "WQkUVWfwBS45" - }, + "id": "WQkUVWfwBS45", + "metadata": {}, "outputs": [ { "name": "stderr", @@ -429,30 +384,24 @@ }, { "cell_type": "markdown", - "id": "5d5c2e0f", - "metadata": { - "id": "upG6zLQ5BS45" - }, + "id": "upG6zLQ5BS45", + "metadata": {}, "source": [ "For more examples of how to manipulate templates and read/write data with Zarr, see the end-to-end [ERA5 climatology](https://github.com/google/xarray-beam/blob/main/examples/era5_climatology.py) and [ERA5 rechunk](https://github.com/google/xarray-beam/blob/main/examples/era5_rechunk.py) examples." ] }, { "cell_type": "markdown", - "id": "9613b48c", - "metadata": { - "id": "CQoRNyAiBS45" - }, + "id": "CQoRNyAiBS45", + "metadata": {}, "source": [ "## Tips for custom data loaders" ] }, { "cell_type": "markdown", - "id": "d2500f6e", - "metadata": { - "id": "CVthTvlIBS45" - }, + "id": "CVthTvlIBS45", + "metadata": {}, "source": [ "If you use Xarray's file opening utilities instead of {py:class}`xarray_beam.open_zarr`, you need to take some care to get good performance when processing very large numbers of chunks (hundreds of thousands).\n", "\n", @@ -463,10 +412,8 @@ }, { "cell_type": "markdown", - "id": "7d3ec100", - "metadata": { - "id": "f6SEr2VhBS45" - }, + "id": "f6SEr2VhBS45", + "metadata": {}, "source": [ "Alternatively, you can pass in lazy datasets [using dask](http://xarray.pydata.org/en/stable/user-guide/dask.html). In this case, you don't need to explicitly supply `chunks` to `DatasetToChunks`:" ] @@ -474,10 +421,8 @@ { "cell_type": "code", "execution_count": 12, - "id": "d3f4f0a0", - "metadata": { - "id": "4MZtgkz0BS45" - }, + "id": "4MZtgkz0BS45", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -498,10 +443,8 @@ }, { "cell_type": "markdown", - "id": "1a9a5810", - "metadata": { - "id": "30mhyfQrBS45" - }, + "id": "30mhyfQrBS45", + "metadata": {}, "source": [ "Dask's lazy evaluation system is much more general than Xarray's lazy indexing, so as long as resulting dataset can be independently evaluated in each chunk using Dask can be a very convenient way to setup computation for Xarray-Beam.\n", "\n", diff --git a/docs/requirements.txt b/docs/requirements.txt index cf23412..f7de0f4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,9 +1,9 @@ # doc requirements +furo==2025.9.25 Jinja2==3.1.6 myst-nb==1.3.0 myst-parser==4.0.1 sphinx==8.2.3 -sphinx_rtd_theme==3.0.2 scipy==1.16.2 matplotlib==3.10.6 diff --git a/xarray_beam/__init__.py b/xarray_beam/__init__.py index 62abd96..135209e 100644 --- a/xarray_beam/__init__.py +++ b/xarray_beam/__init__.py @@ -52,4 +52,4 @@ DatasetToZarr, ) -__version__ = '0.9.3' # automatically synchronized to pyproject.toml +__version__ = '0.10.0' # automatically synchronized to pyproject.toml