In [17]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [1]:
import xarray as xr
import pandas as pd
import numpy as np

## DataArray
labelled multidimensional array

In [5]:
# creating xarray

arr = np.ones((10, 5))
idx = range(10)
cols = list('abcde')

darr = xr.DataArray(arr, coords=[idx, cols], dims=['index', 'field'])
darr

In [6]:
# creating 3-dim, xarray; can have any dimension

arr = np.ones((10, 5, 5))
idx = range(10)
cols = list('abcde')
groups = list('xyzwv')

darr = xr.DataArray(arr, coords=[idx, cols, groups], dims=['index', 'field', 'class'])
darr

In [7]:
darr.sel(field='b')

In [11]:
# coords are not necessarily equal to dims, can add constants 
# variable dependent on some of the dimes

arr = np.ones((10, 5))
idx = range(10)
cols = list('abcde')

darr = xr.DataArray(arr, coords={'index': idx, 'field':cols, 'x': 10, 'y': ('index', range(2, 12))}, dims=['index', 'field'])
darr

In [14]:
# can create data array from df, but need to specify index/cols names

df = pd.DataFrame(arr, idx, cols)
df.index.name = 'index'
df.columns.name = 'field'
xr.DataArray(df)

In [19]:
# methods of data array
darr.values
darr.dims
darr.coords
darr.attrs

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

('index', 'field')

Coordinates:
  * index    (index) int64 0 1 2 3 4 5 6 7 8 9
  * field    (field) <U1 'a' 'b' 'c' 'd' 'e'
    x        int64 10
    y        (index) int64 2 3 4 5 6 7 8 9 10 11

{}

In [22]:
# accessing coordinates from .coords or directly

darr.coords['y']
darr['y']

In [24]:
darr['index']  # get all aligned coordinates with it

In [27]:
darr['field']  # get all aligned coordinates (scalar is aligned with everything)

In [29]:
darr['field']['field']  # idempotent

In [54]:
darr.loc[2, 'b']  # access using loc

In [55]:
darr

In [57]:
dd = xr.DataArray(np.ones((3, 4, 5)), dims=['x', 'y', 'z'], coords={
    'x': range(3),
    'yz': (['y', 'z'], np.zeros((4,5)))
})

## DataSet
equivalent of multidimensional pandas df;
in DataArray there's just one multidim array (coordinates are for alignment and index, not for storing values); 
in DataSet can specify multiple arrays.

In [34]:
# creating DataSet with arrays x, y with different dimensions

x = np.ones((3, 4, 2))
y = np.ones((3, 2))

idx =  list('abc')
cols = list('XY')
fields = range(4)

ds = xr.Dataset(
    {
        'x': (['index', 'fields', 'columns'], x),
        'y': (['index', 'columns'], y),
    },
    coords={
        'index': idx,
        'columns': cols,
        'plane': (['index', 'fields'], np.zeros((3,4)))
        
    }
)
ds

In [33]:
ds['fields']

In [36]:
ds['x']  # getting dataarray from individuals data variables

In [39]:
ds['z'] = xr.DataArray(np.ones((4, 2)), dims=['fields', 'columns'])  # can assign dataarrays to new data vars
ds

In [43]:
ds.

In [46]:
darr.to_dataset(name='arr')  # dataset from single array

In [51]:
ds.sel(index=ds['index'].isin(['b', 'c']))  # boolean index, using isin for dataarrays

In [61]:
ds.dims

Frozen(SortedKeysDict({'columns': 2, 'index': 3, 'fields': 4}))

In [64]:
ds['w'] = xr.DataArray(np.zeros((4, 10)), dims=['fields', 'groups'])
ds

In [65]:
ds

In [69]:
ds.sel(fields=2)

In [70]:
ds.loc[{'fields': 2}]