In [9]:
from pathlib import Path

import pandas as pd
import xarray as xr
from ruamel.yaml import YAML

yaml = YAML()

## Load data

In [136]:
def load_single_file(folder, meta, custom_project_name):
    filename = folder / Path(meta["filename"]).name
    ds = xr.open_dataset(filename).drop(["lat", "lon"])

    da = ds[meta["short_name"]]
    da["project"] = custom_project_name
    da["dataset"] = meta["dataset"]
    da["ensemble"] = meta["ensemble"]

    da = da.drop(["ensemble_member", "ensemble_member_id"], errors="ignore")  # UKCP
    if custom_project == "cordex_cpm3":
        da["ensemble"] = "inconsistent"

    df = da.to_dataframe().set_index(["project", "dataset", "ensemble"], append=True)
    return df


def load_data(experiment="hist", project="CMIP5"):
    folder = Path(
        f"/home/peter/eucp-project/tom_data/preproc/boxplots/box_{experiment}_{project}"
    )

    with open(folder / "metadata.yml", "r") as f:
        metadata = yaml.load(f)

    return pd.concat(
        [load_single_file(folder, meta, project) for meta in metadata.values()]
    )

In [142]:
projects = [
    "CMIP5",
    "CMIP6",
    "cordex_cpm",
    "cordex_cpm2",
    "cordex_cpm3",
    "CORDEX-EUR11",
    "UKCP-GCM",
    "UKCP-RCM",
]

all_difs = []
for project in projects:
    exp = "SSP585" if project == "CMIP6" else "rcp85"

    hist = load_data("hist", project)
    future = load_data(exp, project)

    difference = (future - hist) / hist * 100
    all_difs.append(difference)

data = pd.concat(all_difs)
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,pr
season_number,project,dataset,ensemble,Unnamed: 4_level_1
0,CMIP5,ACCESS1-0,r1i1p1,1.584215
1,CMIP5,ACCESS1-0,r1i1p1,12.019885
2,CMIP5,ACCESS1-0,r1i1p1,-9.247226
3,CMIP5,ACCESS1-0,r1i1p1,-0.685971
0,CMIP5,ACCESS1-3,r1i1p1,3.111616
...,...,...,...,...
3,UKCP-RCM,land-rcm,13,-12.468933
0,UKCP-RCM,land-rcm,15,5.941370
1,UKCP-RCM,land-rcm,15,-1.947676
2,UKCP-RCM,land-rcm,15,-17.315756


## Plot with hvplot

In [143]:
import hvplot.pandas

In [149]:
djf = data.query("season_number==0")
box = djf.hvplot.box(y="pr", by="project")
box * djf.hvplot.scatter(x="project", y="pr", c="orange").opts(jitter=0.5)

## Plot with altair

In [150]:
import altair as alt

In [158]:
# Simple box plot
djf = data.query("season_number==0").reset_index()
box = (
    alt.Chart(djf, width=400)
    .mark_boxplot()
    .encode(
        y="pr:Q",
        color="project:N",
        x="project:N",
    )
)

box

In [152]:
# stripplot / jitter plot
djf = data.query("season_number==0").reset_index()
strip = (
    alt.Chart(djf, width=75)
    .mark_circle(size=50)
    .encode(
        x=alt.X(
            "jitter:Q",
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
            scale=alt.Scale(),
        ),
        y=alt.Y("pr:Q"),
        color=alt.Color("project:N", legend=None),
        column=alt.Column(
            "project:N",
        ),
        tooltip=["dataset", "pr"],
    )
    .transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
    )
    .configure_facet(spacing=10)
    .configure_view(stroke=None)
)

strip

In [172]:
# Combined box/jitter
def box_strip(project, season=0):
    source = data.query("(project==@project)&(season_number==@season)")
    chart = alt.Chart(djf, width=50)
    box = chart.mark_boxplot().encode(
        y="pr:Q",
    )
    strip = chart.mark_circle(size=50).encode(
        x=alt.X("jitter:Q"),
        y=alt.Y("pr:Q"),
        tooltip=["dataset", "pr"],
    ).transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
    )
    return alt.layer(box+strip)

box_strip('CMIP5')
# alt.hconcat(box_strip(project) for project in projects)

ValueError: Only chart objects can be used in HConcatChart.

In [148]:
data.query('project=="cordex_cpm3"')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,pr
season_number,project,dataset,ensemble,Unnamed: 4_level_1
0,cordex_cpm3,KNMI-HCLIM38h1-AROME,inconsistent,-6.987996
1,cordex_cpm3,KNMI-HCLIM38h1-AROME,inconsistent,-2.270622
2,cordex_cpm3,KNMI-HCLIM38h1-AROME,inconsistent,-11.339583
3,cordex_cpm3,KNMI-HCLIM38h1-AROME,inconsistent,12.656061


In [137]:
load_data("hist", "cordex_cpm3")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,pr
season_number,project,dataset,ensemble,Unnamed: 4_level_1
0,cordex_cpm3,KNMI-HCLIM38h1-AROME,inconsistent,3.1e-05
1,cordex_cpm3,KNMI-HCLIM38h1-AROME,inconsistent,3.4e-05
2,cordex_cpm3,KNMI-HCLIM38h1-AROME,inconsistent,3.3e-05
3,cordex_cpm3,KNMI-HCLIM38h1-AROME,inconsistent,3e-05
