In [9]:
from pathlib import Path

import pandas as pd
import xarray as xr
from ruamel.yaml import YAML

yaml = YAML()

## Load data

In [136]:
def load_single_file(folder, meta, custom_project_name):
    filename = folder / Path(meta["filename"]).name
    ds = xr.open_dataset(filename).drop(["lat", "lon"])

    da = ds[meta["short_name"]]
    da["project"] = custom_project_name
    da["dataset"] = meta["dataset"]
    da["ensemble"] = meta["ensemble"]

    da = da.drop(["ensemble_member", "ensemble_member_id"], errors="ignore")  # UKCP
    if custom_project == "cordex_cpm3":
        da["ensemble"] = "inconsistent"

    df = da.to_dataframe().set_index(["project", "dataset", "ensemble"], append=True)
    return df


def load_data(experiment="hist", project="CMIP5"):
    folder = Path(
        f"/home/peter/eucp-project/tom_data/preproc/boxplots/box_{experiment}_{project}"
    )

    with open(folder / "metadata.yml", "r") as f:
        metadata = yaml.load(f)

    return pd.concat(
        [load_single_file(folder, meta, project) for meta in metadata.values()]
    )

In [207]:
projects = [
    "CMIP5",
    "CMIP6",
    "cordex_cpm",
    "cordex_cpm2",
    "cordex_cpm3",
    "CORDEX-EUR11",
    "UKCP-GCM",
    "UKCP-RCM",
]

all_difs = []
for project in projects:
    exp = "SSP585" if project == "CMIP6" else "rcp85"

    hist = load_data("hist", project)
    future = load_data(exp, project)

    difference = (future - hist) / hist * 100
    all_difs.append(difference)

data = pd.concat(all_difs).reset_index()
data

Unnamed: 0,season_number,project,dataset,ensemble,pr
0,0,CMIP5,ACCESS1-0,r1i1p1,1.584215
1,1,CMIP5,ACCESS1-0,r1i1p1,12.019885
2,2,CMIP5,ACCESS1-0,r1i1p1,-9.247226
3,3,CMIP5,ACCESS1-0,r1i1p1,-0.685971
4,0,CMIP5,ACCESS1-3,r1i1p1,3.111616
...,...,...,...,...,...
539,3,UKCP-RCM,land-rcm,13,-12.468933
540,0,UKCP-RCM,land-rcm,15,5.941370
541,1,UKCP-RCM,land-rcm,15,-1.947676
542,2,UKCP-RCM,land-rcm,15,-17.315756


## Plot with hvplot

In [208]:
import hvplot.pandas

In [219]:
djf = data.query("season_number==0")
box = djf.hvplot.box(y="pr", by="project")
jitter = djf.hvplot.scatter(x="project", y="pr", c="orange").opts(jitter=0.5)
box * jitter

## Plot with altair

In [213]:
import altair as alt

In [218]:
# Simple box plot
djf = data.query("season_number==0")
box = (
    alt.Chart(djf, width=400)
    .mark_boxplot()
    .encode(
        y="pr:Q",
        color="project:N",
        x="project:N",
    )
)

box

In [215]:
# stripplot / jitter plot
djf = data.query("season_number==0")
strip = (
    alt.Chart(djf, width=75)
    .mark_circle(size=50)
    .encode(
        x=alt.X(
            "jitter:Q",
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
            scale=alt.Scale(),
        ),
        y=alt.Y("pr:Q"),
        color=alt.Color("project:N", legend=None),
        column=alt.Column(
            "project:N",
        ),
        tooltip=["dataset", "pr"],
    )
    .transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
    )
    .configure_facet(spacing=10)
    .configure_view(stroke=None)
)

strip

In [216]:
# Combined box/jitter (can tweak it further)
def box_strip(project, season=0):
    source = data.query("(project==@project)&(season_number==@season)")
    chart = alt.Chart(source, width=50)
    box = chart.mark_boxplot().encode(
        y="pr:Q",
    )
    strip = (
        chart.mark_circle(size=50)
        .encode(
            x=alt.X("jitter:Q"),
            y=alt.Y("pr:Q"),
            tooltip=["dataset", "pr"],
        )
        .transform_calculate(
            # Generate Gaussian jitter with a Box-Muller transform
            jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
        )
    )
    return box + strip


box_strip("CMIP5") | box_strip("CMIP6")

In [258]:
# with input bindings
season_dropdown = alt.selection_single(
    fields=["season_number"], bind=alt.binding_select(options=[0, 1, 2, 3]), name="A"
)
ensemble_dropdown = alt.selection_single(
    fields=["ensemble"], bind=alt.binding_select(options=["r1i1p1", "inconsistent"]), name="B"
)

chart = (
    alt.Chart(data, width=75)
    .mark_circle(size=50)
    .encode(
        x=alt.X(
            "jitter:Q",
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
            scale=alt.Scale(),
        ),
        y=alt.Y("pr:Q"),
        color=alt.Color("project:N", legend=None),
        column=alt.Column(
            "project:N",
        ),
        tooltip=["dataset", "pr"],
    )
    .transform_calculate(
        jitter="sqrt(-2*log(random()))*cos(2*PI*random())"
    )
    .add_selection(season_dropdown)
    .transform_filter(season_dropdown)
    .add_selection(ensemble_dropdown)
    .transform_filter(ensemble_dropdown)
    .configure_facet(spacing=10)
    .configure_view(stroke=None)
)
chart

In [260]:
chart.save('../static/data/chart.json')
print(chart.to_json())

{
  "$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json",
  "config": {
    "facet": {
      "spacing": 10
    },
    "view": {
      "continuousHeight": 300,
      "continuousWidth": 400,
      "stroke": null
    }
  },
  "data": {
    "name": "data-3501ce73586bc212dda14c0af4fd87dd"
  },
  "datasets": {
    "data-3501ce73586bc212dda14c0af4fd87dd": [
      {
        "dataset": "ACCESS1-0",
        "ensemble": "r1i1p1",
        "pr": 1.5842154026031494,
        "project": "CMIP5",
        "season_number": 0
      },
      {
        "dataset": "ACCESS1-0",
        "ensemble": "r1i1p1",
        "pr": 12.019885063171387,
        "project": "CMIP5",
        "season_number": 1
      },
      {
        "dataset": "ACCESS1-0",
        "ensemble": "r1i1p1",
        "pr": -9.247225761413574,
        "project": "CMIP5",
        "season_number": 2
      },
      {
        "dataset": "ACCESS1-0",
        "ensemble": "r1i1p1",
        "pr": -0.6859712600708008,
        "project": "CMIP5"