In [None]:
%load_ext autoreload
%autoreload 2

# Experimental Results

Here, we will plot the results of running Q-learning on different maps and specifications, along with different reward strategies.

In [None]:
#hide
from pathlib import Path

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as stats

from data_utils import load_data, create_data_table, plot_curve

%matplotlib inline

# plt.style.use("ggplot")
plt.style.use(['science', "ieee"])
plt.rcParams.update({
    "font.family": "serif",   # specify font family here
    "font.serif": ["Times"],  # specify font here
})  

## Loading the data

We will need to set the directory where the logged data is stored. Then, we can use the `load_data` and `create_data_table` functions to extract the data from the stored CSV files into plottable `DataFrame`s.

As we can see, for each reward method, the `DataFrame` returned contains a row entry for each evaluation episode. Each row is indexed by the training iteration number of the evaluated policy (`training_iter`) and contains the total reward obtained from that evaluation run (`total_reward`), the number of accepting visits in the product automaton (`acc_visits`), and whether the run is accepting or not (`accepting`).

In [None]:
LOGDIR = Path("../logs/2022-01-27-080044")
MAP = "map02"
SPEC = "bounded_recurrence1"
METHODS = ["sparse", "true-pot", "lavaei2020", "tauMDP"]

DATA = {
    method: load_data(LOGDIR, MAP, SPEC, method)
    for method in METHODS
}

DATA["true-pot"]

## Plotting the data

For each method, we want to plot the probability of acceptance --- which is the average number of accepting runs at a checkpoint --- and the total reward obtained at a checkpoint. This means that we want to plot the average `acceptance` and average `total_reward` against the training iteration `training_iter`.

To do this, we will first create a `DataFrame` for each of the necessary quantities we want to aggregate, with the columns as the method used. This can be done using the `create_data_table` method defined in `data_utils`. We will then need to aggregate the data across training iterations, compute the error bands with a **95% confidence interval**, and plot the smoothened curves.

## Plotting Probability of Acceptance

Given a set of runs labelled with a `True` or a `False` based on if the run is accepting or not, the probability of acceptance is modelled by the binomial distribution associated with all the runs (each of which is essentially a Bernoulli trial).

Thus, we use the Agrestiâ€“Coull interval formula to calculate the 95% confidence interval for this data.

In [None]:
fig, ax1 = plt.subplots(1,1, sharex=True)
ax1.set_ylabel("Probability of Satisfaction")
ax1.set_xlabel("No. of Training Iterations")

# Probability of accepting
accepting_data = create_data_table("accepting", *DATA.items())
for method in accepting_data.columns:
    data = accepting_data[method]
    plot_curve(method, ax1, data, span=5, confidence=0.95, bernoulli=True)


ax1.legend(loc="best")

### Plotting average total rewards

Since each reward has a different scale, it doesn't really make sense to plot this.

In [None]:
fig, ax1 = plt.subplots(1,1, sharex=True)
ax1.set_ylabel("Average total rewards")
ax1.set_xlabel("No. of Training Iterations")

# Probability of accepting
total_reward_data = create_data_table("total_reward", *DATA.items())
for method in accepting_data.columns:
    data = total_reward_data[method]
    plot_curve(method, ax1, data, span=5, confidence=0.95, bernoulli=False)


ax1.legend(loc="lower right")


# $\tau$-MDP Results

In [None]:
TAU_DATA_DIR = Path("./logs/tau")
TAU_ENV = 2
COLS = ["training_iter", "total_reward", "robustness", "accepting"]

data_files = list(TAU_DATA_DIR.glob(f"tauMDP_ENV_{TAU_ENV}_pslip_0.1_*.csv"))

data = pd.DataFrame()

for f in data_files:
    df: pd.DataFrame = pd.read_csv(
        f, index_col=False, names=COLS, header=0
    )  # type: ignore
    data = pd.concat([data, df], ignore_index=True)

if len(data) > 0:
    data.set_index("training_iter", inplace=True)
data

In [None]:
fig, ax1 = plt.subplots(1,1, sharex=True)
ax1.set_ylabel("Probability of Satisfaction")
ax1.set_xlabel("No. of Training Iterations")

# Probability of accepting
accepting_data = create_data_table("accepting", ("tauMDP", data))
plot_curve("tauMDP", ax1, accepting_data["tauMDP"], span=5, confidence=0.95, bernoulli=True)


ax1.legend(loc="upper right")

In [None]:
fig, ax1 = plt.subplots(1,1, sharex=True)
ax1.set_ylabel("Probability of Satisfaction")
ax1.set_xlabel("No. of Training Iterations")

# Probability of accepting
accepting_data = create_data_table("accepting", ("tauMDP", data))
plot_curve("tauMDP", ax1, accepting_data["tauMDP"], span=5, confidence=0.95, bernoulli=True)


ax1.legend(loc="upper right")