# TCWV

## Import libraries

In [None]:
import xarray as xr
from c3s_eqc_automatic_quality_control import download, utils

## Set parameters

In [None]:
# Set parameters
year_start = 2003
year_stop = 2017

# Region
lon_slice = slice(-180, 180)
lat_slice = slice(-30, 30)

## Define requests

In [None]:
requests = {}
time_request = {
    "year": [str(year) for year in range(year_start, year_stop + 1)],
    "month": [f"{month:02d}" for month in range(1, 13)],
    "day": [f"{day:02d}" for day in range(1, 32)],
}

requests["satellite-total-column-water-vapour-land-ocean"] = time_request | {
    "product": "near_infrared_hoaps_combined",
    "horizontal_aggregation": ["0_5_x_0_5"],
    "temporal_aggregation": "daily",
    "variable": "all",
}
requests["satellite-cloud-properties"] = time_request | {
    "product_family": "clara_a3",
    "origin": "eumetsat",
    "variable": ["cloud_fraction"],
    "climate_data_record_type": "thematic_climate_data_record",
    "time_aggregation": "daily_mean",
}
requests["derived-era5-single-levels-daily-statistics"] = time_request | {
    "product_type": "reanalysis",
    "variable": ["skin_temperature"],
    "daily_statistic": "daily_mean",
    "time_zone": "utc+00:00",
    "frequency": "1_hourly",
}
requests["satellite-earth-radiation-budget"] = time_request | {
    "product_family": "clara_a3",
    "origin": "eumetsat",
    "variable": ["outgoing_longwave_radiation"],
    "climate_data_record_type": "thematic_climate_data_record",
    "time_aggregation": "daily_mean",
}

## Define functions to cache

In [None]:
def coarsen_and_regionalise(ds, lon_slice, lat_slice, slicer, coarsen):
    if slicer:
        ds = ds.isel(slicer)
    if coarsen:
        ds = ds.coarsen(coarsen).mean(keep_attrs=True)
    ds = utils.regionalise(ds, lon_slice=lon_slice, lat_slice=lat_slice)
    return ds

## Download and transform

In [None]:
datasets = []
for collection_id, request in requests.items():
    slicer = {}
    if collection_id == "derived-era5-single-levels-daily-statistics":
        slicer["latitude"] = slice(1, None, 2)
        slicer["longitude"] = slice(1, None, 2)

    coarsen = {}
    if collection_id in [
        "satellite-cloud-properties",
        "satellite-earth-radiation-budget",
    ]:
        coarsen["latitude"] = 2
        coarsen["longitude"] = 2
    ds = download.download_and_transform(
        collection_id,
        request,
        chunks={"year": 1, "month": 1},
        transform_func=coarsen_and_regionalise,
        transform_func_kwargs={
            "lon_slice": lon_slice,
            "lat_slice": lat_slice,
            "slicer": slicer,
            "coarsen": coarsen,
        },
    )
    datasets.append(ds)
ds_daily = xr.merge([ds.drop_dims(set(ds.dims) & {"nv", "bnds"}) for ds in datasets])

## Post-processing

In [None]:
# Compute clear sky greenhouse effect and add to the merged dataset
ds_daily["GCS"] = 5.67 * 10 ** (-8) * (ds_daily["skt"] ** 4) - ds_daily["LW_flux"]
ds_daily["GCS"].attrs = {"long_name": "clear sky greenhouse effect", "units": "W m−2"}

# Selection of clear sky regions (at most 10\% of cloud fraction within each grid cell)
# and quality of retrieved SST and TCVW (SST > 0°K and TCWV > 0 mm)
mask = (
    (ds_daily["cfc"] < 10) & (ds_daily["skt"] > 0) & (ds_daily["tcwv"] > 0)
)  ## & (ds_daily["GCS"] > 120)

ds_daily = ds_daily.where(mask)

# Compute montly mean from daily & masked data
ds_monthly = ds_daily.resample(time="M").mean()