# Studying the influence of atmospheric circulation on upper tropospheric humidity

## Import libraries

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
from c3s_eqc_automatic_quality_control import download

plt.style.use("seaborn-v0_8-notebook")

## Define parameters

In [None]:
# Time
start = "2007-01"
stop = "2007-01"

# Region: Degree range has to be integers because UTH has a resolution of 1x1
lat_min = -30
lat_max = 30
lon_min = 0
lon_max = 360
assert (isinstance(coord, int) for coord in (lat_min, lat_max, lon_min, lon_max))
assert 0 <= lon_min <= lon_max <= 360

## Set the data request

In [None]:
chunks = {"year": 1, "month": 1}

collection_id_uth = "satellite-upper-troposphere-humidity"
request_uth = {
    "variable": "all",
    "sensor_on_satellite": "mhs_on_metop_a",
    "area": [lat_max, lon_min, lat_min, lon_max],
}


collection_id_omega500 = "reanalysis-era5-pressure-levels"
request_omega500 = {
    "product_type": "reanalysis",
    "variable": "vertical_velocity",
    "pressure_level": "500",
}

## Download UTH

In [None]:
ds_uth = download.download_and_transform(
    collection_id_uth,
    download.update_request_date(request_uth, start, stop, stringify_dates=True),
    chunks=chunks,
)

## Download OMEGA 500

In [None]:
datasets = []
for hh in range(int(lon_min / 15), int((lon_max - 0.02) / 15) + 1):
    print(f"{hh=}")

    # Define time and area
    time = f"{int(9-hh) if 9-hh>=0 else int(9-hh)+24}:00"
    area = [
        lat_min,
        max(lon_min, hh * 15) + 0.01,
        lat_max - 0.01,
        min(lon_max, (hh + 1) * 15),
    ]
    area = [coord - 360 if coord > 180 else coord for coord in area]

    # Download
    ds = download.download_and_transform(
        collection_id_omega500,
        download.update_request_date(
            request_omega500 | {"area": area, "time": time},
            start,
            stop,
            stringify_dates=True,
        ),
        chunks=chunks,
    )

    # Process coordinates
    ds = ds.drop_vars("time").rename(forecast_reference_time="time")
    ds["time"] = ds["time"].dt.floor("D")
    ds["longitude"] = ds["longitude"].where(ds["longitude"] >= 0, ds["longitude"] + 360)

    datasets.append(ds)
ds_omega500 = xr.concat(datasets, "longitude")

## Process OMEGA500

In [None]:
ds_omega500 = ds_omega500.coarsen(latitude=4, longitude=4).mean()

# Longitudes between 150 and 180 are not well colocated in time
# Local day does not correspond to UTC day
# We are rolling the values of w in order to fix the colocation.
# The records for the first day will be set to NAN
ds_omega500 = ds_omega500.where(
    (ds_omega500["longitude"] < 150) | (ds_omega500["longitude"] > 180),
    ds_omega500.roll(time=1).where(ds["time"] != ds["time"].min()),
)

# Units
with xr.set_options(keep_attrs=True):
    ds_omega500["w"] *= 24 * 3.6  # from Pa/s to hPa / day
ds_omega500.attrs["units"] = "hPa / day"

## Make 1D dataframe

In [None]:
series = {}
for da in [ds_uth["uth_mean_ascend"], ds_omega500["w"]]:
    series[da.name] = (
        da.sortby(list(da.dims))
        .stack(index=sorted(da.dims))
        .to_series()
        .reset_index(drop=True)
    )
df = pd.DataFrame(series)

## Plot

In [None]:
bin_edges = range(-105, 66, 10)
bin_labels = pd.Series(bin_edges).rolling(2).mean()[1:]
grouper = pd.cut(df["w"], bin_edges)
ax = df.groupby(grouper, observed=False).boxplot(
    subplots=False,
    column="uth_mean_ascend",
    showfliers=False,
    patch_artist=True,
    showmeans=True,
    medianprops={"linewidth": 2.5, "color": "k"},
    meanprops={
        "marker": "D",
        "markeredgecolor": "black",
        "markerfacecolor": "green",
        "markersize": 8,
    },
    boxprops={"color": "k", "facecolor": "silver"},
    xlabel="omega 500 [hPa/day]",
    ylabel="UTH [%]",
    grid=True,
)
ax.set_title("UTH as a function of vertical velocity")
_ = ax.set_xticklabels(bin_labels.astype(int))