# Criando um xarray Dataset a partir de dados sintéticos

## Importando os módulos principais

In [1]:
import numpy as np

In [2]:
import xarray as xr

In [4]:
import pandas as pd

## Criando as coordenadas temporal e espacial

### Latitudes

In [27]:
lats = np.linspace(-90,90,320)

In [29]:
print("lats:\n",lats)

lats:
 [-90.         -89.43573668 -88.87147335 -88.30721003 -87.74294671
 -87.17868339 -86.61442006 -86.05015674 -85.48589342 -84.92163009
 -84.35736677 -83.79310345 -83.22884013 -82.6645768  -82.10031348
 -81.53605016 -80.97178683 -80.40752351 -79.84326019 -79.27899687
 -78.71473354 -78.15047022 -77.5862069  -77.02194357 -76.45768025
 -75.89341693 -75.32915361 -74.76489028 -74.20062696 -73.63636364
 -73.07210031 -72.50783699 -71.94357367 -71.37931034 -70.81504702
 -70.2507837  -69.68652038 -69.12225705 -68.55799373 -67.99373041
 -67.42946708 -66.86520376 -66.30094044 -65.73667712 -65.17241379
 -64.60815047 -64.04388715 -63.47962382 -62.9153605  -62.35109718
 -61.78683386 -61.22257053 -60.65830721 -60.09404389 -59.52978056
 -58.96551724 -58.40125392 -57.8369906  -57.27272727 -56.70846395
 -56.14420063 -55.5799373  -55.01567398 -54.45141066 -53.88714734
 -53.32288401 -52.75862069 -52.19435737 -51.63009404 -51.06583072
 -50.5015674  -49.93730408 -49.37304075 -48.80877743 -48.24451411
 -4

### Longitudes

In [31]:
lons = np.linspace(0,360,641)

In [32]:
print("lons:\n",lons)

lons:
 [  0.       0.5625   1.125    1.6875   2.25     2.8125   3.375    3.9375
   4.5      5.0625   5.625    6.1875   6.75     7.3125   7.875    8.4375
   9.       9.5625  10.125   10.6875  11.25    11.8125  12.375   12.9375
  13.5     14.0625  14.625   15.1875  15.75    16.3125  16.875   17.4375
  18.      18.5625  19.125   19.6875  20.25    20.8125  21.375   21.9375
  22.5     23.0625  23.625   24.1875  24.75    25.3125  25.875   26.4375
  27.      27.5625  28.125   28.6875  29.25    29.8125  30.375   30.9375
  31.5     32.0625  32.625   33.1875  33.75    34.3125  34.875   35.4375
  36.      36.5625  37.125   37.6875  38.25    38.8125  39.375   39.9375
  40.5     41.0625  41.625   42.1875  42.75    43.3125  43.875   44.4375
  45.      45.5625  46.125   46.6875  47.25    47.8125  48.375   48.9375
  49.5     50.0625  50.625   51.1875  51.75    52.3125  52.875   53.4375
  54.      54.5625  55.125   55.6875  56.25    56.8125  57.375   57.9375
  58.5     59.0625  59.625   60.1875  60.75 

### Tempos

In [33]:
times = pd.date_range("2014-11-01",periods=4)

In [34]:
print("times:\n",times)

times:
 DatetimeIndex(['2014-11-01', '2014-11-02', '2014-11-03', '2014-11-04'], dtype='datetime64[ns]', freq='D')


## Criando um array de dados sintéticos

Aqui deveremos criar um array que possua as dimensões temporal e espacial que definimos anteriormente. Para isso, utilizamos o tamanho destes arrays:

In [44]:
tmpvar = 15 + 8 * np.random.randn(len(times),len(lons),len(lats))

In [45]:
print("tmpvar:\n",tmpvar)

tmpvar:
 [[[20.23152403 16.46161902 12.50700463 ...  3.87581107 -1.89963194
    6.30090415]
  [17.86166355  8.25916669 21.529279   ... 16.58651488  8.45888622
    9.72509464]
  [ 4.48669903 18.1619333   8.10038706 ...  8.81936472  9.5004539
    7.07441812]
  ...
  [34.15020166  9.0029629  17.62032037 ... 21.3348045  24.62480801
   12.719267  ]
  [20.64384799 11.02374536  6.94788123 ...  6.92821607 24.60789144
   29.5228278 ]
  [26.20239932 31.26233518  5.7119261  ... 28.79821822 21.4898335
   20.46938951]]

 [[ 9.93329189  3.72693704 -0.8489019  ... 20.1043803  18.28204737
   18.15168154]
  [ 2.24452202 28.14332436 21.3437838  ... 14.76646335 16.75857444
   18.24294792]
  [ 9.49173585 21.53624132 11.60377171 ... 17.4012787  17.61161162
   21.1078093 ]
  ...
  [12.6195886   5.34606521 21.84627509 ... 15.14686589 -5.95480991
   19.9461389 ]
  [13.7841958   7.13996328 12.73249644 ... 21.87749518 14.22054288
   12.96184267]
  [16.25322792 19.61231346 17.40041273 ...  0.92241689  4.05707269

Ou seja, o array "tmpvar" possui dimensões "len(time)" x "len(lon)" x "len(lat)".

### Criando o Dataset

Inicializando o Dataset:

In [51]:
ds = xr.Dataset()

In [52]:
ds

<xarray.Dataset>
Dimensions:  ()
Data variables:
    *empty*

Adicionando a variável "tmpvar" ao Dataset. Observe que a ordem das dimensões deve ser a mesma que indexa a variável "tmpvar":

In [54]:
ds["tmpvar"] = (("time","lon","lat"),tmpvar)

In [55]:
ds

<xarray.Dataset>
Dimensions:  (lat: 320, lon: 641, time: 4)
Dimensions without coordinates: lat, lon, time
Data variables:
    tmpvar   (time, lon, lat) float64 20.23 16.46 12.51 ... 11.56 19.2 10.09

Adicionando as coordenadas espaciais:

In [56]:
ds.coords["lat"] = ("lat",lats)

In [57]:
ds

<xarray.Dataset>
Dimensions:  (lat: 320, lon: 641, time: 4)
Coordinates:
  * lat      (lat) float64 -90.0 -89.44 -88.87 -88.31 ... 88.31 88.87 89.44 90.0
Dimensions without coordinates: lon, time
Data variables:
    tmpvar   (time, lon, lat) float64 20.23 16.46 12.51 ... 11.56 19.2 10.09

In [58]:
ds.coords["lon"] = ("lon",lons)

In [59]:
ds

<xarray.Dataset>
Dimensions:  (lat: 320, lon: 641, time: 4)
Coordinates:
  * lat      (lat) float64 -90.0 -89.44 -88.87 -88.31 ... 88.31 88.87 89.44 90.0
  * lon      (lon) float64 0.0 0.5625 1.125 1.688 ... 358.3 358.9 359.4 360.0
Dimensions without coordinates: time
Data variables:
    tmpvar   (time, lon, lat) float64 20.23 16.46 12.51 ... 11.56 19.2 10.09

Adicionando a coordenada temporal:

In [60]:
ds.coords["time"] = times

In [61]:
ds

<xarray.Dataset>
Dimensions:  (lat: 320, lon: 641, time: 4)
Coordinates:
  * lat      (lat) float64 -90.0 -89.44 -88.87 -88.31 ... 88.31 88.87 89.44 90.0
  * lon      (lon) float64 0.0 0.5625 1.125 1.688 ... 358.3 358.9 359.4 360.0
  * time     (time) datetime64[ns] 2014-11-01 2014-11-02 2014-11-03 2014-11-04
Data variables:
    tmpvar   (time, lon, lat) float64 20.23 16.46 12.51 ... 11.56 19.2 10.09

Adicionando um tempo de referência:

In [62]:
ds.coords["reference_time"] = pd.Timestamp("2014-10-31")

In [64]:
ds

<xarray.Dataset>
Dimensions:         (lat: 320, lon: 641, time: 4)
Coordinates:
  * lat             (lat) float64 -90.0 -89.44 -88.87 ... 88.87 89.44 90.0
  * lon             (lon) float64 0.0 0.5625 1.125 1.688 ... 358.9 359.4 360.0
  * time            (time) datetime64[ns] 2014-11-01 2014-11-02 ... 2014-11-04
    reference_time  datetime64[ns] 2014-10-31
Data variables:
    tmpvar          (time, lon, lat) float64 20.23 16.46 12.51 ... 19.2 10.09

Adicionando metadados que ajudam a descrever as variáveis:

In [66]:
ds.attrs["title"] = "Absolute Temperature"

In [67]:
ds

<xarray.Dataset>
Dimensions:         (lat: 320, lon: 641, time: 4)
Coordinates:
  * lat             (lat) float64 -90.0 -89.44 -88.87 ... 88.87 89.44 90.0
  * lon             (lon) float64 0.0 0.5625 1.125 1.688 ... 358.9 359.4 360.0
  * time            (time) datetime64[ns] 2014-11-01 2014-11-02 ... 2014-11-04
    reference_time  datetime64[ns] 2014-10-31
Data variables:
    tmpvar          (time, lon, lat) float64 20.23 16.46 12.51 ... 19.2 10.09
Attributes:
    title:    Absolute Temperature

In [68]:
ds.attrs["unit"] = "Degrees Celsius"

In [69]:
ds

<xarray.Dataset>
Dimensions:         (lat: 320, lon: 641, time: 4)
Coordinates:
  * lat             (lat) float64 -90.0 -89.44 -88.87 ... 88.87 89.44 90.0
  * lon             (lon) float64 0.0 0.5625 1.125 1.688 ... 358.9 359.4 360.0
  * time            (time) datetime64[ns] 2014-11-01 2014-11-02 ... 2014-11-04
    reference_time  datetime64[ns] 2014-10-31
Data variables:
    tmpvar          (time, lon, lat) float64 20.23 16.46 12.51 ... 19.2 10.09
Attributes:
    title:    Absolute Temperature
    unit:     Degrees Celsius

In [76]:
ds.attrs["resolution"] = 360 / (len(lon)-1)

In [77]:
ds

<xarray.Dataset>
Dimensions:         (lat: 320, lon: 641, time: 4)
Coordinates:
  * lat             (lat) float64 -90.0 -89.44 -88.87 ... 88.87 89.44 90.0
  * lon             (lon) float64 0.0 0.5625 1.125 1.688 ... 358.9 359.4 360.0
  * time            (time) datetime64[ns] 2014-11-01 2014-11-02 ... 2014-11-04
    reference_time  datetime64[ns] 2014-10-31
Data variables:
    tmpvar          (time, lon, lat) float64 20.23 16.46 12.51 ... 19.2 10.09
Attributes:
    title:       Absolute Temperature
    unit:        Degrees Celsius
    resolution:  0.5625

### Acrescentando uma nova variável (com as mesmas dimensões)

In [91]:
tmpvar2 = 1005 + np.random.randn(len(time),len(lon),len(lat))

In [92]:
print("tmpvar2:\n",tmpvar2)

tmpvar2:
 [[[1004.99613901 1006.12102585 1005.93411186 ... 1003.33184502
   1005.13032353 1003.61297746]
  [1004.66057644 1005.31397722 1004.4772732  ... 1003.45443012
   1005.86832166 1005.79097357]
  [1003.40748908 1006.22100942 1003.90138814 ... 1004.58410498
   1005.73083301 1003.44634031]
  ...
  [1005.48058873 1004.37132758 1003.27431544 ... 1004.63357604
   1004.8140959  1004.74494668]
  [1004.37045553 1003.43721821 1004.05936659 ... 1005.84680362
   1005.13260767 1005.71334181]
  [1004.3046657  1005.59621221 1005.10679608 ... 1005.43970557
   1006.60466972 1005.07145339]]

 [[1004.60794255 1004.83629149 1004.82933918 ... 1003.3305867
   1004.33912859 1005.39198463]
  [1005.72224311 1004.81543447 1004.03886741 ... 1002.20669754
   1005.61096706 1006.90580757]
  [1005.22257837 1003.19380201 1004.99345206 ... 1004.92968103
   1004.55110556 1006.82302846]
  ...
  [1004.30591427 1004.17217913 1004.83182652 ... 1005.25773648
   1003.9321745  1005.2568774 ]
  [1005.20840993 1004.91569

Acrescentando a variável "tmpvar2" ao Dataset "ds":

In [93]:
ds["tmpvar2"] = (("time","lon","lat"),tmpvar2)

In [94]:
ds

<xarray.Dataset>
Dimensions:         (lat: 320, lon: 641, time: 4)
Coordinates:
  * lat             (lat) float64 -90.0 -89.44 -88.87 ... 88.87 89.44 90.0
  * lon             (lon) float64 0.0 0.5625 1.125 1.688 ... 358.9 359.4 360.0
  * time            (time) datetime64[ns] 2014-11-01 2014-11-02 ... 2014-11-04
    reference_time  datetime64[ns] 2014-10-31
Data variables:
    tmpvar          (time, lon, lat) float64 20.23 16.46 12.51 ... 19.2 10.09
    tmpvar2         (time, lon, lat) float64 1.005e+03 1.006e+03 ... 1.006e+03
Attributes:
    title:       Absolute Temperature
    unit:        Degrees Celsius
    resolution:  0.5625