In [None]:
#| echo: false
using Pkg
Pkg.activate(".")
using YAXArrays
using Zarr
using CairoMakie
CairoMakie.activate!(type = "png")
using Dates

# Data Cubes

## YAXArrays


We are mostly going to work with Zarr datasets

In [None]:
using YAXArrays
using Zarr

c1_path = "/home/gkraemer/data/DataCube/v3.0.2/esdc-8d-0.25deg-256x128x128-3.0.2.zarr"
# c1_path = "http://data.rsc4earth.de:9000/earthsystemdatacube/v3.0.2/esdc-8d-0.25deg-256x128x128-3.0.2.zarr"
# c1_path = "/work/user/gy963viny/public/EarthSystemDataCube/v3.0.2/esdc-8d-0.25deg-256x128x128-3.0.2.zarr"
# c1_path = "https://s3.uni-leipzig.de/esdc302/esdc-8d-0.25deg-256x128x128-3.0.2.zarr"
c1_zarr = Zarr.zopen(c1_path)
c1_dataset = YAXArrays.open_dataset(c1_zarr)
c1 = YAXArrays.Cube(c1_dataset)

## Alternative chunking for plotting maps

In [None]:
c2_path = "/home/gkraemer/data/DataCube/v3.0.2/esdc-8d-0.25deg-1x720x1440-3.0.2.zarr"
# c2_path = "http://data.rsc4earth.de:9000/earthsystemdatacube/v3.0.2/esdc-8d-0.25deg-1x720x1440-3.0.2.zarr"
# c2_path = "/work/user/gy963viny/public/EarthSystemDataCube/v3.0.2/esdc-8d-0.25deg-1x720x1440-3.0.2.zarr"
# c2_path = "https://s3.uni-leipzig.de/esdc302/esdc-8d-0.25deg-1x720x1440-3.0.2.zarr"
c2_zarr = Zarr.zopen(c2_path)
c2_dataset = YAXArrays.open_dataset(c2_zarr)
c2 = YAXArrays.Cube(c2_dataset)

## subset

subsets are lazy

In [None]:
using Dates
c1_sensible_heat = c1[variable = "sensible_heat"]
c1_2020 = c1[time = (Date(2020, 1, 1), Date(2021, 1, 1))]

## Axes

In [None]:
getAxis("Variable", c1).values

## Access as array

In [None]:
using CairoMakie # using GLMakie for interactive plots
heatmap(c2[:, :, 1800, 1])

## Access as array 2

In [None]:
lines(c1[500, 300, :, 1])

## Split-apply-combine

In [None]:
using Statistics
t_trend = mapslices(mean, c2[variable = "air_temperature_2m"],
                    dims = ("lon", "lat"))

In [None]:
lines(t_trend[:])

## More complex split apply combine

In [None]:
stat_axis = CategoricalAxis("statistic", ["mean", "std"])
global_stat_trends = YAXArrays.mapCube(
    c2[variable = "air_temperature_2m"],
    indims = InDims("lon", "lat"),
    outdims = OutDims(stat_axis)
) do xout, xin
    xin2 = filter(!isnan, xin)
    m = mean(xin2)
    xout[1] = m
    xout[2] = std(xin2, mean = m)
end

## Plot it

In [None]:
lines(global_stat_trends[2, :])

## Save it

In [None]:
filename =  "data/global_stat_trends.zarr"
savecube(global_stat_trends, filename, driver = :zarr, overwrite = true)
Cube(open_dataset(zopen(filename)))

# Exercises

## Exercises {.smaller}

- The statistics in the examples are not weighted. Extend them with a correct
  weighting.
  - The area of a pixel scales approximately with $\cos(\text{lat})$
  - The package `StatsBase` contains functions for `sum`, `mean`, etc. with
    weights. Look at the documentation for how to use them.
- Measure the time the above functions take when using `c2`. Explain why the
  time is different.
  - Calculate local trends in temperature
  - Identify bottlenecks for calculation
- look at the help `mapslices` for arrays and implement it yourself as
  `$name_mapslices`.
  - make a PR to `git@git.sc.uni-leipzig.de:ss2023-12-geo-m-ds02/mapslices.git`
    and add the function.
  - lets see who writes the most readable code, who writes the fastest function,
    etc.