In [32]:
import numpy as np
import pandas as pd
import xarray as xr

rng = np.random.default_rng(seed=0)

### DataArray

In [3]:
da = xr.DataArray(
    np.ones((3, 4, 2)),
    dims={"x", "y", "z"},
    name="a",
    coords={"z": [-1, 1], "u": ("x", [0.1, 1.2, 2.3])},
    attrs={"attr": "value"}
)

A `DataArray` has two main string representation options `html` and `text`. `html` is only available from
notebooks

In [4]:
with xr.set_options(display_style='text'):
    display(da)

In [6]:
with xr.set_options(display_style='html'):
    display(da)

In [7]:
da.data

array([[[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]]])

In [8]:
da.dims

('x', 'y', 'z')

In [9]:
da.coords

Coordinates:
  * z        (z) int64 -1 1
    u        (x) float64 0.1 1.2 2.3

In [10]:
type(da.data)

numpy.ndarray

In [11]:
da.attrs

{'attr': 'value'}

In [12]:
da.shape

(3, 4, 2)

##### Creating a DataArraz containing coordinates

In [14]:
# coords.x and coords.y match the number of rows and columns of the numpy array respectively.
# coords.u is an additional dimiension along "x" with its own metadata
da = xr.DataArray(
    np.ones((3, 4)),
    dims=("x", "y"),
    coords={
        "x": ["a", "b", "c"],
        "y": np.arange(4),
        "u": ("x", np.arange(3), {"attr": 0})
    },
)
da

##### Excersice

Create a `DataArray` named "height" from random data

 1. with dimensions named "latitude" and "longitude"

In [20]:
arr = rng.random((180, 360)) * 400
height = xr.DataArray(arr, dims=('latitude', 'longitude'))
height

2. With dimension coordinates:
- "latitude": -90 to 90 with step size 1
- "longitude": -180 to 180 with step size 1

In [24]:
height = xr.DataArray(
    arr,
    dims=('latitude', 'longitude'),
    coords={
        'latitude': np.linspace(-90, 90, num=180),
        'longitude': np.linspace(-180, 180, num=360)
    }
)
height

3. With metadata for both data and coordinates:

- height: "type": "ellipsoid"
- latitude: "type": "geodetic"
- longitude: "prime_meridian": "greenwich"

In [27]:
height = xr.DataArray(
    arr,
    dims=('latitude', 'longitude'),
    coords={
        'latitude': np.linspace(-90, 90, num=180),
        'longitude': np.linspace(-180, 180, num=360)
    },
    name='height',
    attrs={
        'height': { 'type': 'ellipsoid' },
        'latitude': { 'type': 'geodetic' },
        'longitude': { 'prime_meridian': 'greenwich' }
    }
)
height

In [26]:
height.attrs

{'height': {'type': 'ellipsoid'},
 'latitude': {'type': 'geodetic'},
 'longitude': {'prime_meridian': 'greenwich'}}

### Dataset

`Dataset` objects collect multiple data variables, each can habe different dimensions

The constructor of `Dataset` takes three parameters:

- `data_vars`: dict-like mapping names to values. Similar to coordinates in `DataArray` but using either objects or the tuple sintax used to provide dimension
- `coords`: same as for `DataArray`
- `attrs`: same as for `DataArray`

The following `Dataset` contains two variables:

In [30]:
ds = xr.Dataset(
    data_vars={
        'a': (('x', 'y'), np.ones((3, 4))),
        'b': ('t', np.full((8,), 3), {'attr': 'value'})
    },
    coords={'x': [-1, 0, 1],},
    attrs={'attr': 'value'}
)
ds

In [34]:
xr.Dataset(
    data_vars= {
        'a': (('x', 'y'), np.ones((3, 4))),
        'b': (('t', 'x'), np.full((8, 3), 3))
    },
    coords={
        'x': ['a', 'b', 'c'],
        'y': np.arange(4),
        't': pd.date_range('2020-07-05', periods=8, freq='D') # 8 days from 2020-07-25
    },
)

In [37]:
# The tuple sintax doesn't work if two objects with the same dimensions have non-matching shapes
# In this case DataArray objects have to be used

x_a = np.arange(1, 4)
x_b = np.arange(-1, 3)

a = xr.DataArray(np.linspace(0, 1, 3), dims='x', coords={'x': x_a})
b = xr.DataArray(np.zeros(4), dims='x', coords={'x': x_b})

xr.Dataset(data_vars={'a': a, 'b': b})

##### Excercises

1. create a Dataset with two variables along `latitude` and `longitude`: `height` and `gravity_anomaly`


In [39]:
height = rng.random((180, 360)) * 400
gravity_anomaly = rng.random((180, 360)) * 400 - 200

xr.Dataset(
    data_vars={
        'height': (('latitude', 'longitude'), height),
        'gravity_anomaly': (('latitude', 'longitude'), gravity_anomaly)
    }
)

2. add coordinates to `latitude` and `longitude`:
- `latitude`: from -90 to 90 with step size 1
- `longitude`: from -180 to 180 with step size 1

In [41]:
xr.Dataset(
    data_vars={
        'height': (('latitude', 'longitude'), height),
        'gravity_anomaly': (('latitude', 'longitude'), gravity_anomaly)
    },
    coords={
        'latitude': np.arange(-90, 90),
        'longitude': np.arange(-180, 180)
    }
)

3. add metadata to coordinates and variables
- `latitude`: "type": "generic"
- `longitude`: "prime_meridian": "greenwich"
- `height`: "ellipsoid": "wgs84"
- `gravity_anomaly`: "ellipsoid": "grs80"

In [50]:
ds = xr.Dataset(
    data_vars={
        'height': (
            ('latitude', 'longitude'), 
            height, 
            {'latitude': {'type': 'generic'}, 'longitude': {'prime_meridian': 'greenwich'}, 'ellipsoid': 'wgs84'}
        ),
        'gravity_anomaly': (
            ('latitude', 'longitude'), 
            gravity_anomaly, 
            {'latitude': {'type': 'generic'}, 'longitude': {'prime_meridian': 'greenwich'}, 'ellipsoid': 'grs80'}
        )
    },
    coords={
        'latitude': ('latitude', np.arange(-90, 90), {'type': 'generic'}),
        'longitude': ('longitude', np.arange(-180, 180), {'prime_meridian': 'greenwich'})
    }
)
ds

In [62]:
ds.variables['height'].values

array([[386.07529837, 179.02970377, 228.25957506, ..., 379.99256392,
        281.57477967, 310.84352388],
       [353.4034526 , 192.65838344,  93.49341533, ..., 225.87821852,
        391.18615172,  65.58503322],
       [204.2967594 , 121.54915116, 374.26835767, ..., 394.50376586,
        390.41370402, 180.95328545],
       ...,
       [133.31364126, 308.42485343, 114.27836112, ..., 370.56212016,
        398.95614822,  49.78601545],
       [187.25374319, 259.83962302, 169.4764494 , ..., 314.26182972,
        119.98483324, 332.83021087],
       [181.1893609 ,   8.17463794,  13.33950606, ..., 272.39776632,
        333.18204102,  24.12761496]])

In [63]:
ds.variables['gravity_anomaly'].values

array([[-186.45308128, -112.00726214,  -40.45131512, ...,  116.35388169,
         185.05054367, -120.47858773],
       [  37.64840474,  -65.33717994,   83.89118403, ...,  125.11509026,
        -101.3088592 ,  126.69426325],
       [-142.64519684,  181.10134378, -160.39200864, ...,  101.58808138,
        -149.43774291,  140.2747501 ],
       ...,
       [-192.5662972 ,   40.51177602,   -8.81897539, ..., -194.03991407,
          70.89763259,   68.92599807],
       [ -16.11992875,   68.54517733, -182.80268429, ..., -113.31267938,
          56.25922254, -132.10755195],
       [ 133.9030059 ,   98.54710121,   29.92920942, ..., -146.05184466,
        -125.21702421,   41.6784466 ]])