# Query submitted data from GlaMBIE submission system

Demo on how to use the glambie code to query and show data from the submission system

In [None]:
import matplotlib.pyplot as plt
from glambie.data.data_catalogue import DataCatalogue
from glambie.plot.plot_helpers import get_colours, add_labels_axlines_and_title
from glambie.plot.plot_helpers import (
    plot_non_cumulative_timeseries_on_axis,
    plot_cumulative_timeseries_on_axis,
)
from glambie.data import submission_system_interface

#### Query and filter data

In [None]:
# read catalogue - this might take a while to download data from the submission system
catalogue_original = DataCatalogue.from_glambie_submission_system()

In [None]:
# filter catalogue by region: we'll just query datasets over alaska
catalogue_filtered = catalogue_original.get_filtered_catalogue(region_name="alaska")

In [None]:
# Let's print all datasets in Alaska
for d in catalogue_filtered.datasets:
    print(f"{d.region.name} \t {d.data_group.name} \t {d.user_group}")

### Plot all datasets from one group 

Now we will filter by region (Iceland) and data group (DEM DIfferencing) and plot all the datasets in one plot

In [None]:
# filter catalogue by region and datagroup
catalogue_filtered_group = catalogue_original.get_filtered_catalogue(
    region_name="iceland", data_group="demdiff"
)
plot_errors = True  # set to True if you want to plot error bars

_, axes = plt.subplots(2, 1, figsize=(11, 6))
colours = get_colours(len(catalogue_filtered_group.datasets) + 1)
# plot non-cumulative timeseries
for count, ds in enumerate(catalogue_filtered_group.datasets):
    plot_non_cumulative_timeseries_on_axis(
        result_dataframe=ds.data.as_dataframe(),
        ax=axes[0],
        colour=colours[count],
        plot_errors=plot_errors,
    )

# plot cumulative timeseries
for count, ds in enumerate(catalogue_filtered_group.datasets):
    plot_cumulative_timeseries_on_axis(
        timeseries=ds,
        ax=axes[1],
        colour=colours[count],
        plot_errors=plot_errors,
        linestyle="-",
        timeseries_for_vertical_adjustment=None,  # you can set this to a particular reference timeseries if you don't want the cumulative timeseries starting at 0 for each dataset
        label="Dataset: " + ds.user_group,
    )

add_labels_axlines_and_title(
    axes=axes,
    unit=catalogue_filtered_group.datasets[0].unit,
    legend_fontsize=9,
    title="",
)

### Pick one dataset and look at it

We'll now pick out one dataset and look at it's data and metadata and perform some simple operations

In [None]:
# pick a dataset to look at
dataset = catalogue_filtered.datasets[9]
# print some metadata
print(
    f"region = {dataset.region.name} \t data_group/source = {dataset.data_group.name} \t author group = {dataset.user_group}  \t unit = {dataset.unit}  \t RGI version = {dataset.rgi_version}"
)
# show first few rows of data of that dataset
dataset.data.as_dataframe().head()

In [None]:
# Show some more metadata...
# The metadata comes as a dictionnary, here are the keys:
print(dataset.additional_metadata.keys())
# for example let's pick one key show co-authors
print("Co-Authors: ", dataset.additional_metadata["co_author_names"])

In [None]:
# Example how to convert to a different unit (in this case we convert from meter water equivalent to Gigatonnes)
dataset_in_mwe = dataset.convert_timeseries_to_unit_mwe(rgi_area_version=6)
dataset_in_mwe.data.as_dataframe().head()

In [None]:
# now the datasets have different units
dataset.unit, dataset_in_mwe.unit

In [None]:
# Convert to annual trends
# First we need to convert it to our evenly spaced grid
dataset_date_grid = dataset_in_mwe.convert_timeseries_to_monthly_grid()
dataset_annual = dataset_date_grid.convert_timeseries_to_annual_trends()

In [None]:
# Convert to a longterm_trend
dataset_trend = dataset_date_grid.convert_timeseries_to_longterm_trend()
dataset_trend.data.as_dataframe()

#### Plot original dataset (in mwe), calculated annual rates and calculated longterm trend

In [None]:
plot_errors = False  # set to True if you want to plot error bars

_, axes = plt.subplots(2, 1, figsize=(11, 6))
colours = get_colours(3)

# plot non-cumulative timeseries
plot_non_cumulative_timeseries_on_axis(
    result_dataframe=dataset_in_mwe.data.as_dataframe(),
    ax=axes[0],
    colour=colours[0],
    plot_errors=plot_errors,
)
plot_non_cumulative_timeseries_on_axis(
    result_dataframe=dataset_annual.data.as_dataframe(),
    ax=axes[0],
    colour=colours[1],
    plot_errors=plot_errors,
)
plot_non_cumulative_timeseries_on_axis(
    result_dataframe=dataset_trend.data.as_dataframe(),
    ax=axes[0],
    colour=colours[2],
    plot_errors=plot_errors,
)

# plot cumulative timeseries
plot_cumulative_timeseries_on_axis(
    timeseries=dataset_in_mwe,
    ax=axes[1],
    colour=colours[0],
    plot_errors=plot_errors,
    linestyle="-",
    timeseries_for_vertical_adjustment=None,
    label="Original dataset: " + dataset_in_mwe.user_group,
)
plot_cumulative_timeseries_on_axis(
    timeseries=dataset_annual,
    ax=axes[1],
    colour=colours[1],
    plot_errors=plot_errors,
    linestyle="-",
    timeseries_for_vertical_adjustment=dataset_in_mwe,  # vertically adjust to original dataset
    label="Annual rates: " + dataset_annual.user_group,
)
plot_cumulative_timeseries_on_axis(
    timeseries=dataset_trend,
    ax=axes[1],
    colour=colours[2],
    plot_errors=plot_errors,
    linestyle="-",
    timeseries_for_vertical_adjustment=dataset_in_mwe,  # vertically adjust to original dataset
    label="Trend: " + dataset_trend.user_group,
)

add_labels_axlines_and_title(
    axes=axes,
    unit=catalogue_filtered_group.datasets[0].unit,
    legend_fontsize=9,
    title="",
)

#### Download dataset information PDF of dataset

Download the PDF submitted by the participants

In [None]:
# download PDF (by default it will be in the same directory as notebook is run unless target directory is specified)
submission_system_interface.download_dataset_information_file_to_disk(
    user_group=dataset.user_group, data_group=dataset.data_group, target_directory="."
)