# CH4 dataset satellite lev2 - daily

## Import libraries

In [None]:
import flox.xarray
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
from c3s_eqc_automatic_quality_control import diagnostics, download, plot, utils

plt.style.use("seaborn-v0_8-notebook")

## Define parameters

In [None]:
# Sensors
sensors = ["iasi_metop_c_nlis", "iasi_metop_b_nlis"]

# Region to plot
lat_slice = slice(45, 75)
lon_slice = slice(-15, 20)

# Variable to plot
varname = "ch4"
vertical_profile = "ch4_averaging_kernel"
varname_cloud = "cfc"

## Define request

In [None]:
time_request = {
    "year": ["2022"],
    "month": ["09"],
    "day": [f"{day:02d}" for day in range(1, 31)],
}

collection_id = "satellite-methane"
request = time_request | {
    "processing_level": ["level_2"],
    "variable": "ch4",
    "version": ["10_2"],
}

collection_id_cloud = "satellite-cloud-properties"
request_cloud = time_request | {
    "product_family": "clara_a3",
    "origin": "eumetsat",
    "variable": ["cloud_fraction"],
    "climate_data_record_type": "interim_climate_data_record",
    "time_aggregation": "daily_mean",
}

## Define function to cache

In [None]:
def arithmetic_unweighted_average(ds, d_lon, d_lat, lon1):
    if lon1 not in (180, 360):
        raise ValueError(f"lon1 must be 180 or 360. {lon1=}")
    lon0 = -180 if lon1 == 180 else 0

    coords = {}
    expected_groups = ()
    for name, start, stop, step in zip(
        ["latitude", "longitude"], [-90, lon0], [90, lon1], [d_lat, d_lon]
    ):
        coords[name] = np.arange(start + step / 2, stop + step / 2, step)
        groups = np.arange(start, stop + step, step)
        groups[0] -= step
        expected_groups += (pd.IntervalIndex.from_breaks(groups),)

    ds = flox.xarray.xarray_reduce(
        ds, *coords, func="mean", expected_groups=expected_groups, keep_attrs=True
    )
    ds = ds.rename({f"{coord}_bins": coord for coord in coords}).assign_coords(coords)
    for coord in ds.coords:
        ds[coord].attrs["standard_name"] = coord
    return ds


def regionalised_daily_regrid(ds, d_lon, d_lat, lon_slice, lat_slice, lon1=180):
    for dim, var in zip(
        ["level_dim", "layer_dim"], ["pressure_levels", "pressure_weight"]
    ):
        assert (ds[var] == ds[var].isel(time=0)).all()
        ds[var] = ds[var].isel(time=0, drop=True)
        ds = ds.swap_dims({dim: var})

    ds = ds.set_coords(["longitude", "latitude", "pressure_levels"])
    ds_out = ds.resample(time="1D").map(
        arithmetic_unweighted_average, d_lon=d_lon, d_lat=d_lat, lon1=lon1
    )
    return utils.regionalise(ds_out, lat_slice=lat_slice, lon_slice=lon_slice)

## Download and transform data

In [None]:
chunks = {"year": 1, "month": 1}
kwargs = {"lon_slice": lon_slice, "lat_slice": lat_slice}

# Methane
datasets = []
for sensor in sensors:
    print(f"{sensor = }")
    ds = download.download_and_transform(
        collection_id,
        request | {"sensor_and_algorithm": sensor},
        transform_func=regionalised_daily_regrid,
        transform_func_kwargs=kwargs
        | {
            "d_lon": 1,
            "d_lat": 1,
            "lon1": 180,
        },
        chunks=chunks,
    )
    datasets.append(ds.expand_dims(sensor=[sensor]))
ds = xr.concat(datasets, "sensor")

# Cloud
print("cloud cover")
ds_cloud = download.download_and_transform(
    collection_id_cloud,
    request_cloud,
    transform_func=utils.regionalise,
    transform_func_kwargs=kwargs,
    chunks=chunks,
)

## Plot maps

In [None]:
plot_kwargs = {"col": "time", "col_wrap": 5}
for sensor, ds_sensor in ds.groupby("sensor"):
    plot.projected_map(ds_sensor[varname], **plot_kwargs)
    plt.suptitle(f"{sensor = }", y=1)
    plt.show()

plot.projected_map(ds_cloud[varname_cloud], **plot_kwargs)
_ = plt.suptitle("Cloud cover", y=1)

## Boxplot

In [None]:
for sensor, ds_sensor in ds.groupby("sensor"):
    da_sensor = ds_sensor[varname].squeeze()
    df_sensor = (
        da_sensor.stack(location=("latitude", "longitude"))
        .drop_vars("location")
        .to_dataframe()
    )
    df_sensor = df_sensor.reset_index()[["time", varname]]
    df_sensor["day"] = df_sensor["time"].dt.day
    ax = df_sensor[["day", varname]].boxplot(by="day")
    ax.set_title(f"{sensor = }")
    ax.set_xlabel("day")
    ax.set_ylabel(f"{da_sensor.long_name} [{da_sensor.units}]")
    plt.show()

## Plot vertical profiles

In [None]:
for sensor, da_sensor in ds[vertical_profile].groupby("sensor"):
    da_mean = diagnostics.spatial_weighted_mean(da_sensor)
    da_std = diagnostics.spatial_weighted_std(da_sensor)
    da = xr.concat(
        [
            da_mean.expand_dims(label=["mean"]),
            (da_mean + da_std).expand_dims(label=["mean $+$ std"]),
            (da_mean - da_std).expand_dims(label=["mean $-$ std"]),
        ],
        "label",
    )
    facet = da.plot(
        y="pressure_weight",
        hue="label",
        yscale="log",
        yincrease=False,
        **plot_kwargs,
    )
    for ax in facet.axs.flatten():
        ax.grid()
    plt.suptitle(f"{sensor = }", y=1.01)
    plt.show()