# General temporal aggregation methods

In [None]:
# If first time running, uncomment the line below to install any additional dependencies
# !bash requirements-for-notebooks.sh

In [None]:
from earthkit.transforms import aggregate as ek_aggregate
from earthkit import data as ek_data

from earthkit.data.testing import earthkit_remote_test_data_file
ek_data.settings.set("cache-policy", "user")

## Load some test data

All `earthkit-transforms` methods can be called with `earthkit-data` objects (Readers and Wrappers) or with the 
pre-loaded `xarray`.

In this example we will use hourly ERA5 2m temperature data on a 0.5x0.5 spatial grid for the year 2015 as
our physical data.

First we download (if not already cached) lazily load the ERA5 data (please see tutorials in `earthkit-data` for more details in cache management).

We inspect the data using the describe method and see we have some 2m air temperature data. For a more detailed representation of the data you can use the to_xarray method.

In [None]:
# Get some demonstration ERA5 data, this could be any url or path to an ERA5 grib or netCDF file.
remote_era5_file = earthkit_remote_test_data_file("test-data", "era5_temperature_europe_2015.grib")
era5_data = ek_data.from_source("url", remote_era5_file)
era5_data.describe()
# era5_data.to_xarray()

## Reduce the ERA5 data over the time dimension

The default reduction method is `mean`, other methods can be applied using the `how` kwarg.

Note that we do not need to worry about the data format of the input array, earthkit will convert it to the required xarray format internally.

The returned object is an xarray dataset, however this may change in future version of the package.

### The mean over the time dimension

In [None]:
era5_t_mean = ek_aggregate.temporal.reduce(era5_data)  # how="mean"
era5_t_mean

In [None]:
# A simple matplotlib plot to view the data:
era5_t_mean.t2m.plot()

### The median over the time dimension

In [None]:
era5_t_median = ek_aggregate.temporal.reduce(era5_data, how="median")
era5_t_median

In [None]:
# A simple matplotlib plot to view the data:
era5_t_median.t2m.plot()

### Calling the temporal reduce method with an arbitary function

The `temporal.reduce` method can take any method which is accepted by the xarray reduce method, typically this means it must take `axis` as an argument. See the [xarray.Dataset.reduce](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.reduce.html) documentation for more details.

In [None]:
import numpy as np
def my_method(array, axis=None, **kwargs):
    return np.mean(array, axis=axis, **kwargs) * np.std(array, axis=axis, **kwargs)

era5_t_my_method = ek_aggregate.temporal.reduce(era5_data, how=my_method, how_label="random")
era5_t_my_method


In [None]:
# A simple matplotlib plot to view the data:
era5_t_my_method.t2m_random.plot()

### Calculate a rolling mean with a 50 timestep window

There is no temporal specific method for a rolling reduction. The general rolling_reduce method can do this calculation by specifying the dimension over which you would like to reduce.

In [None]:
era5_rolling = ek_aggregate.rolling_reduce(
    era5_data, time=50, center=True,
)
era5_rolling