In [1]:
import numpy as np 
import pandas as pd 
import xarray as xr

Easy introduction: https://www.ecmwf.int/sites/default/files/elibrary/2017/17837-xarray-n-d-labeled-arrays-and-datasets-python.pdf


## Create a DataArray 

- get the data from a numpy array or list with optional dimensions and coordinates

In [6]:
data = xr.DataArray(np.random.randn(2, 3), coords={'x': ['a', 'b']}, dims=('x', 'y'))




<xarray.DataArray (x: 2, y: 3)>
array([[-0.251043,  0.718491,  0.067735],
       [ 1.093429, -0.72558 , -0.053311]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y

## Indexing 

xarrays support 4 types of indexing. Works like (and as fast as) pandas

In [44]:
# positional and by integer label, like numpy
print(data[[0, 1]].values)

print ()
# positional and by coordinate label, like pandas
print (data.loc['a':'b'].values)

print ()

# by dimension name and integer label
print (data.isel(y=slice(3)).values)

[[-0.25104301  0.71849113  0.06773485]
 [ 1.09342918 -0.72558031 -0.05331089]]

[[-0.25104301  0.71849113  0.06773485]
 [ 1.09342918 -0.72558031 -0.05331089]]

[[-0.25104301  0.71849113  0.06773485]
 [ 1.09342918 -0.72558031 -0.05331089]]


## Computation 

Same way as numpy arrays 

In [50]:
print (data + 10. , "\n-------------")

print (np.sin(data) , "\n-------------")

print (data.T , "\n-------------")

print (data.sum() , "\n-------------")

<xarray.DataArray (x: 2, y: 3)>
array([[ 9.748957, 10.718491, 10.067735],
       [11.093429,  9.27442 ,  9.946689]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y 
-------------
<xarray.DataArray (x: 2, y: 3)>
array([[-0.248414,  0.65825 ,  0.067683],
       [ 0.888208, -0.66357 , -0.053286]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y 
-------------
<xarray.DataArray (y: 3, x: 2)>
array([[-0.251043,  1.093429],
       [ 0.718491, -0.72558 ],
       [ 0.067735, -0.053311]])
Coordinates:
  * x        (x) <U1 'a' 'b'
Dimensions without coordinates: y 
-------------
<xarray.DataArray ()>
array(0.849721) 
-------------


## Adding two dimensions together: 

- arithmetic operations broadcasr based on dimension name 

In [55]:
a = xr.DataArray(np.random.randn(3), [data.coords['y']])
b = xr.DataArray(np.random.randn(4), dims='z')

In [57]:
a.values, b.values

(array([ 0.1352864 , -0.57512182,  0.47335528]),
 array([ 0.37147218,  0.03194342,  0.33572492, -0.63229357]))

In [61]:
# you can add each element of a to each element from b 
(b+a).values

array([[ 0.50675858, -0.20364965,  0.84482746],
       [ 0.16722982, -0.5431784 ,  0.5052987 ],
       [ 0.47101132, -0.2393969 ,  0.8090802 ],
       [-0.49700718, -1.2074154 , -0.15893829]])

## Convert from xarrays to pandas and Datasets 

- can easily convert from xarrays to pandas 
- can easily create datasets (a dict like contaigner of aligned DataArray objects) 

## Clarifying the difference between dimensions and coordinates 

- **dimensions** are a verbal way to call axes for the array functions. So they will always try to return the full array on that directtion

- **coordinates** are the equivalent of indecces on pandas  




In [128]:
data = xr.DataArray(np.random.randn(100, 100, 100), 
                    coords={'x': np.arange(100), 'y': np.arange(100), 'z': np.arange(100)}, 
                    dims=('x', 'y', 'z'))

# for my case we will not need to name each of the coordinates, but we can name the dimensions and then slice 
# on the dimensions given our coordinates 


In [129]:
# checking our array: 
data.values

array([[[ 9.18726021e-01, -3.74928809e-01,  6.92485618e-01, ...,
          9.89194743e-01,  2.99878839e-01,  1.23478785e-01],
        [ 5.69304042e-01,  1.20027195e-02, -4.97617050e-01, ...,
         -1.16407075e-02, -1.89260115e+00, -7.39161483e-01],
        [-5.58809220e-01,  4.63862116e-01, -8.67753670e-01, ...,
         -1.76223624e+00,  6.15407743e-01,  1.51660082e+00],
        ...,
        [ 2.98299300e-01,  5.89452413e-01, -5.65392669e-01, ...,
          5.29663316e-01,  7.24257612e-01, -9.85817790e-01],
        [ 5.60724301e-01, -1.18464811e-01, -1.13196521e+00, ...,
          7.74477375e-02,  8.30383815e-01,  1.67172358e+00],
        [-3.87412227e-01,  1.88590778e+00,  1.10297175e+00, ...,
          3.18953023e-01, -3.56593521e-01, -4.26090748e-02]],

       [[ 6.34940091e-01, -2.07699817e+00, -6.31818449e-01, ...,
          8.07657910e-01, -1.51299007e-01,  7.36067277e-01],
        [-1.71216361e+00, -1.26122480e+00,  6.92428795e-01, ...,
          4.01985218e-01,  6.30319522e

In [130]:
# checking our coordinates 
data.coords

Coordinates:
  * x        (x) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99
  * y        (y) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99
  * z        (z) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99

In [131]:
# slicing stuff so we see how it works 
data.isel(x=slice(2)) 
# slice the array so you get the array with the first two coordinates of the x array 
# the rest of the elements of the y and z dimensions

<xarray.DataArray (x: 2, y: 100, z: 100)>
array([[[ 0.918726, -0.374929, ...,  0.299879,  0.123479],
        [ 0.569304,  0.012003, ..., -1.892601, -0.739161],
        ...,
        [ 0.560724, -0.118465, ...,  0.830384,  1.671724],
        [-0.387412,  1.885908, ..., -0.356594, -0.042609]],

       [[ 0.63494 , -2.076998, ..., -0.151299,  0.736067],
        [-1.712164, -1.261225, ...,  0.63032 ,  1.576484],
        ...,
        [ 0.48616 ,  1.459981, ..., -0.675072,  0.557939],
        [-0.385151,  0.59414 , ..., -1.18513 , -0.339615]]])
Coordinates:
  * x        (x) int64 0 1
  * y        (y) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99
  * z        (z) int64 0 1 2 3 4 5 6 7 8 9 10 ... 90 91 92 93 94 95 96 97 98 99

In [132]:
# getting the coordinates system to work: 
data.loc[:, 0, 0]

<xarray.DataArray (x: 100)>
array([ 0.918726,  0.63494 ,  1.513771,  1.176929,  1.850477,  1.899534,
       -1.059031,  0.345758, -2.05903 , -0.809691, -0.83651 , -0.330935,
       -0.784653, -1.180967,  1.953103, -1.552595, -0.479478, -0.160214,
        0.68258 ,  0.891143, -1.45566 , -0.882108, -1.064798, -0.329636,
        0.915908, -0.470417, -0.261359, -0.186707, -1.487307, -1.687321,
       -0.791828,  0.201099,  0.805674, -1.516007, -0.660584,  0.646293,
        1.050131, -2.129005,  1.22172 ,  0.935798,  0.305109,  0.026836,
       -1.53567 ,  0.71506 , -1.110271,  0.296964,  0.481104,  1.435986,
       -0.243788,  0.608281, -1.072874, -0.230489, -0.123245, -0.691321,
        0.527827, -1.58756 ,  0.152132,  0.208152, -0.941483, -0.460581,
        0.185482,  0.753864,  1.205263,  0.456756, -0.182047, -1.057044,
       -0.399228, -1.332571,  0.67068 , -0.201549,  0.456885, -0.09173 ,
       -1.435661,  2.951403,  0.129482, -2.052067,  0.755702, -0.528773,
       -0.902203, -1.83