# Example plots of PEDL experiment data #

Run `pedl e describe --metrics --outdir <outdir> <experiment id>` to generate CSV
data.

In [None]:
# Set this to get the script started.
input_directory = "/Users/yoavz/Desktop/mnist_conservative"

In [None]:
import os

import pandas as pd

## Data loading ##

Load raw data, basic transformations.

In [None]:
dtype_map = {col: str for col in ["Experiment ID", "Trial ID", "Step ID"]}

experiments = pd.read_csv(os.path.join(input_directory, "experiments.csv"),
                          dtype=dtype_map,
                          parse_dates=["Start Time", "End Time"])
trials = pd.read_csv(os.path.join(input_directory, "trials.csv"),
                     dtype=dtype_map,
                     parse_dates=["Start Time", "End Time"])
steps = pd.read_csv(
    os.path.join(input_directory, "steps.csv"),
    dtype=dtype_map,
    parse_dates=[
        "Start Time", "End Time", "Checkpoint Start Time", "Checkpoint End Time",
        "Validation Start Time", "Validation End Time"])

In [None]:
experiments.dtypes

In [None]:
assert len(experiments) == 1

In [None]:
trials.dtypes

In [None]:
len(trials)

In [None]:
steps.dtypes

In [None]:
len(steps)

## Data transformations ##

Join basic experiment info.

In [None]:
trials = pd.merge(trials, experiments[["Experiment ID", "Description"]], on="Experiment ID")
steps = pd.merge(steps, trials[["Trial ID", "Experiment ID", "Description"]], on="Trial ID")

# Make Step ID numerical
steps["Step ID"] = steps["Step ID"].apply(pd.to_numeric)

Add start time of the first trial for the experiment.

In [None]:
# Note time since experiment start is not very useful, as experiments may not
# start their first trial for some time.
steps = pd.merge(steps, trials[["Experiment ID", "Start Time"]].groupby(
    "Experiment ID", as_index=False).min().rename(
        columns={"Start Time": "First Trial Start Time"}),
                 on="Experiment ID")

# Add start time of the trial for the step.
steps = pd.merge(steps, steps[["Trial ID", "Start Time"]].groupby(
    "Trial ID", as_index=False).min().rename(
        columns={"Start Time": "Trial Start Time"}),
                 on="Trial ID")

# Calculate total number of steps associated with each trial ID.
# This varies based on the rung the trial ends up in.
trials = pd.merge(trials, steps[["Trial ID", "Step ID"]].groupby(
    "Trial ID", as_index=False).max().rename(
        columns={"Step ID": "Total Steps In Trial"}), 
        on = "Trial ID")
trials["Total Steps In Trial"] = trials["Total Steps In Trial"].astype("category")

## Model performance graphs ##

In [None]:
from plotnine import (
    coord_cartesian, ggplot, aes, geom_col, geom_boxplot, geom_line, geom_point,
    scale_y_log10, facet_wrap, guides, facet_grid, ylab, ggtitle, theme_minimal, xlab,
    geom_bar)

Number of trials per step size allocation

In [None]:
steps_in_trial_counts = trials.groupby("Total Steps In Trial")\
                              .size()\
                              .reset_index(name="Number of Trials")

(ggplot(steps_in_trial_counts, aes(x="Total Steps In Trial", y="Number of Trials")) +
  theme_minimal() +
  geom_bar(stat="identity") +
  ggtitle("Trial training performance by step")
)

Trial training performance by step.

In [None]:
steps["Step ID"] = steps["Step ID"].astype(int)

(ggplot(steps, aes(x="Step ID", y="loss")) +
  theme_minimal() +
  geom_point(aes(color="Trial ID")) +
  geom_line(aes(color="Trial ID")) +
  xlab("step") +
  ylab("loss, log-scaled") +
  scale_y_log10() +
  ggtitle("Trial training performance by step")
)

Trial training performance by time since step trial start.

In [None]:
steps["Since Trial Start"] = (
    steps["End Time"] - steps["Trial Start Time"]).dt.total_seconds()

(ggplot(steps, aes(x="Since Trial Start",
                   y="loss")) +
  theme_minimal() +
  geom_point(aes(color="Trial ID")) +
  geom_line(aes(color="Trial ID")) +
  xlab("Seconds since trial start") +
  ylab("loss, log-scaled") +
  scale_y_log10() +
  ggtitle("Trial training performance by time from trial start")
)

Trial training performance by time since _first_ trial start.

In [None]:
steps["Since First Trial Start"] = (
    steps["End Time"] - steps["First Trial Start Time"]).dt.total_seconds()

(ggplot(steps, aes(x="Since First Trial Start",
                   y="loss")) +
  theme_minimal() +
  geom_line(aes(color="Trial ID")) +
  geom_point(aes(color="Trial ID")) +
  xlab("Seconds since first trial start") +
  ylab("loss, log-scaled") +
  scale_y_log10() +
  ggtitle("Trial training performance by time from first trial start")
)

Trial validation performance by step.

In [None]:
(ggplot(steps.loc[~steps["Validation Metric"].isna()],
        aes(x="Step ID", y="Validation Metric")) +
 theme_minimal() +
 scale_y_log10() + 
 ylab("Validation Metric, log-scaled") +
 geom_point(aes(color="Trial ID")) +
 geom_line(aes(color="Trial ID")) + 
 ggtitle("Trial validation performance by step")
)


Experiment validation performance since first trial start.

In [None]:
# Add across-trial step counter `experiment_step_num`
best_validation = (
    steps
    .sort_values("End Time")
    .groupby("Experiment ID", as_index=False)
    .apply(lambda x: x.reset_index(drop=True).reset_index())
    .reset_index(drop=True)
    .rename(columns={"index": "Experiment Step Num"})
)
# Add min_validation_metric
best_validation["Min Validation Metric"] = (
    best_validation
    .loc[~best_validation["Validation Metric"].isna()]
    .sort_values("End Time")
    .groupby("Experiment ID", as_index=False)["Validation Metric"].cummin()
)

best_validation = best_validation[~best_validation["Min Validation Metric"].isna()]
best_validation.dtypes

In [None]:
(ggplot(best_validation,
        aes(x="Since First Trial Start",
            y="Min Validation Metric")) +
   theme_minimal() +
   geom_line(aes(color="Experiment ID")) +
   xlab("seconds since first trial start") +
   ggtitle("Best validation performance since first trial start")
)

Experiment validation performance by step.

In [None]:
(ggplot(best_validation,
       aes(x="Experiment Step Num", y="Min Validation Metric")) +
  theme_minimal() +
  geom_line(aes(color="Experiment ID")) +
  ggtitle("Best validation performance by step num")
)