In [2]:
!pip install xarray

[0m

In [10]:
import numpy as np
print("numpy version=", np.__version__)
import xarray as xr
print("xarray version=", xr.__version__)

numpy version= 1.22.1
xarray version= 2023.6.0


In [6]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:77% !important; }</style>"))

# For users

## Overview: why xarray?

- Xarray adds labels (e.g., dimensions, coordinates, attributes) on top of numpy arrays

- N-dim arrays (aka tensors) supported by numpy
- Xarray allows to
    - apply ops over dimensions by name
    - select values by label (instead of integer location)
    - vectorizes
    - split-apply-combine paradigm
    - use dimension names (e.g., `dim='time'` vs `axis=0`)
    - write less code
    
`DataArray` = labeled N-dimensional array
    - generalizes `pd.Series`
    - attaches labels to `np.ndarray`

`Dataset` = dict-like container of `DataArray`
    - similar to `pd.DataFrame`
    - arrays in `Dataset` can have different number of dimensions

## Quick overview

From https://docs.xarray.dev/en/stable/getting-started-guide/quick-overview.html

### Create DataArray

In [14]:
# Create a 2D array
# Assign x and y to the dimensions
# Associate coordinate labels 10 and 20 to locations along x dimension.
data = xr.DataArray(np.random.rand(2, 3),
                    dims=("x", "y"),
                    coords={"x": [10, 20]})
data

In [15]:
print(data)

<xarray.DataArray (x: 2, y: 3)>
array([[0.14130229, 0.16855454, 0.53897536],
       [0.03800943, 0.49883363, 0.71189534]])
Coordinates:
  * x        (x) int64 10 20
Dimensions without coordinates: y


In [16]:
data.values

array([[0.14130229, 0.16855454, 0.53897536],
       [0.03800943, 0.49883363, 0.71189534]])

In [17]:
data.dims

('x', 'y')

In [18]:
data.coords

Coordinates:
  * x        (x) int64 10 20

In [19]:
data.attrs

{}

### Indexing

In [20]:
# positional and by integer label (like numpy)
data[0, :]

In [23]:
# loc, "location": by position and coordinate label (like pandas)
# Get data along the first dimension for the index called `10`
data.loc[10]

In [25]:
# isel, "integer select": by dimension name and integer label
# Get data along the dimension `x` for the first index
data.isel(x=0)

In [26]:
# sel, "select", by dimension name and coordinate label
# Get data along the dimension `x` and the index `10`
data.sel(x=10)

### Attributes