In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import numpy as np
import torch
import matplotlib.pyplot as plt

import pandas as pd

from matrix_estimation import get_expanded_T, run_estimation

def plot_summary_df(df, ax, color, label, reps):
    mean = df['mean']
    std = df['std']
    xs = df['N']
    ax.plot(xs, mean, 'o-', color=color, label=label)
    ax.fill_between(xs, mean - (1.69 * std / np.sqrt(reps)), mean + (1.69 *std / np.sqrt(reps)), color=color, alpha=0.1)

In [10]:
results = {}

In [11]:
num_deltas = 10
actions = range(2)
states = range(3)
delta_names = list(range(1, num_deltas + 1))
delays = range(len(delta_names))
terminal_state = 2
T_lr = 0.01 #03

In [12]:

true_T = np.array([
    [
        [0.89, 0.1, 0.01],
        [0.15, 0.8, 0.05],
        [0.0, 0.0, 1.0]
    ],
    [
        [0.1, 0.89, 0.01],
        [0.8, 0.15, 0.05],
        [0.0, 0.0, 1.0]
    ],
])
get_expanded_T(torch.tensor(true_T), len(delta_names))[0, :, :, 0]

tensor([[0.8900, 0.1000, 0.0100],
        [0.1500, 0.8000, 0.0500],
        [0.0000, 0.0000, 1.0000]])

In [13]:
true_T

array([[[0.89, 0.1 , 0.01],
        [0.15, 0.8 , 0.05],
        [0.  , 0.  , 1.  ]],

       [[0.1 , 0.89, 0.01],
        [0.8 , 0.15, 0.05],
        [0.  , 0.  , 1.  ]]])

In [14]:
Ns = [1, 2, 5, 10, 20, 50, 100]

reps = 30
convergence = 1e-5
patience = 3
device = torch.device('cpu')

In [None]:
# exhaustive uniform deltas
explore_deltas = range(num_deltas)

summary = run_estimation(
    true_T, delta_names, explore_deltas, Ns, reps, states, actions, device, terminal_state, 
    T_lr, convergence, patience, delta_schedule=None,exhaustive=True)

fig, axes = plt.subplots(int(np.ceil(len(summary) / 2)), 2, figsize=(8 * 1.5, 2 * len(summary)))
for i, metric in enumerate(summary.keys()):
    if metric == 'expanded_est_T':
        continue
    
    i1, i2 = i//2, i % 2
    ax = axes[i1][i2]
    summary_df = pd.DataFrame(summary[metric])
    smart_df = summary_df[summary_df['estimator'] == 'smart']
    dumb_df = summary_df[summary_df['estimator'] == 'dumb']
    empirical_df = summary_df[summary_df['estimator'] == 'empirical']
    
    plot_summary_df(smart_df, ax, color='tab:blue', label='smart', reps=reps)
    plot_summary_df(dumb_df, ax, color='tab:orange', label='dumb', reps=reps)
    plot_summary_df(empirical_df, ax, color='tab:green', label='empirical', reps=reps)
    ax.legend()
    ax.set_ylabel(metric)
    ax.set_xlabel('Number of repetitions of each (s,a,k) combo')
    ax.set_title(metric)
    ax.set_xscale('log')
    # ax.set_xscale('log')
plt.tight_layout()
fig.suptitle('Uniform Delays')

results['uniform_exhaustive_delays'] = summary

In [None]:
# sampling uniform deltas
explore_deltas = range(num_deltas)

summary = run_estimation(
    true_T, delta_names, explore_deltas, Ns, reps, states, actions, device, terminal_state, 
    T_lr, convergence, patience, delta_schedule=None,exhaustive=False)

fig, axes = plt.subplots(int(np.ceil(len(summary) / 2)), 2, figsize=(8 * 1.5, 2 * len(summary)))
for i, metric in enumerate(summary.keys()):
    if metric == 'expanded_est_T':
        continue
    i1, i2 = i//2, i % 2
    ax = axes[i1][i2]
    summary_df = pd.DataFrame(summary[metric])
    smart_df = summary_df[summary_df['estimator'] == 'smart']
    dumb_df = summary_df[summary_df['estimator'] == 'dumb']
    empirical_df = summary_df[summary_df['estimator'] == 'empirical']

    plot_summary_df(smart_df, ax, color='tab:blue', label='smart', reps=reps)
    plot_summary_df(dumb_df, ax, color='tab:orange', label='dumb', reps=reps)
    plot_summary_df(empirical_df, ax, color='tab:green', label='empirical', reps=reps)
    ax.legend()
    ax.set_ylabel(metric)
    ax.set_xlabel('Number of repetitions of each (s,a,k) combo')
    ax.set_title(metric)
    ax.set_xscale('log')
plt.tight_layout()
fig.suptitle('Uniform Delays')

results['uniform_sampling_delays'] = summary

In [None]:
# max delta
explore_deltas = [num_deltas - 1]

summary = run_estimation(
    true_T, delta_names, explore_deltas, Ns, reps, states, actions, device, terminal_state, 
    T_lr, convergence, patience, delta_schedule=None)

fig, axes = plt.subplots(int(np.ceil(len(summary) / 2)), 2, figsize=(8 * 1.5, 2 * len(summary)))
for i, metric in enumerate(summary.keys()):
    if metric == 'expanded_est_T':
        continue
    
    i1, i2 = i//2, i % 2
    ax = axes[i1][i2]
    summary_df = pd.DataFrame(summary[metric])
    smart_df = summary_df[summary_df['estimator'] == 'smart']
    dumb_df = summary_df[summary_df['estimator'] == 'dumb']
    empirical_df = summary_df[summary_df['estimator'] == 'empirical']

    plot_summary_df(smart_df, ax, color='tab:blue', label='smart', reps=reps)
    plot_summary_df(dumb_df, ax, color='tab:orange', label='dumb', reps=reps)
    plot_summary_df(empirical_df, ax, color='tab:green', label='empirical', reps=reps)
    ax.legend()
    ax.set_ylabel(metric)
    ax.set_xlabel('Number of repetitions of each (s,a,k) combo')
    ax.set_title(metric)
plt.tight_layout()
fig.suptitle('Max Delay Only')

results['max_delta_delay'] = summary

In [None]:
# min delta
explore_deltas = [0]

summary = run_estimation(
    true_T, delta_names, explore_deltas, Ns, reps, states, actions, device, terminal_state, 
    T_lr, convergence, patience, delta_schedule=None)

fig, axes = plt.subplots(int(np.ceil(len(summary) / 2)), 2, figsize=(8 * 1.5, 2 * len(summary)))
for i, metric in enumerate(summary.keys()):
    if metric == 'expanded_est_T':
        continue
    
    i1, i2 = i//2, i % 2
    ax = axes[i1][i2]
    summary_df = pd.DataFrame(summary[metric])
    smart_df = summary_df[summary_df['estimator'] == 'smart']
    dumb_df = summary_df[summary_df['estimator'] == 'dumb']
    empirical_df = summary_df[summary_df['estimator'] == 'empirical']

    plot_summary_df(smart_df, ax, color='tab:blue', label='smart', reps=reps)
    plot_summary_df(dumb_df, ax, color='tab:orange', label='dumb', reps=reps)
    plot_summary_df(empirical_df, ax, color='tab:green', label='empirical', reps=reps)
    ax.legend()
    ax.set_ylabel(metric)
    ax.set_xlabel('Number of repetitions of each (s,a,k) combo')
    ax.set_title(metric)
plt.tight_layout()
fig.suptitle('Min Delay Only')

results['min_delta_delay'] = summary

In [20]:
import pickle
with open('results/k10_done.pkl', 'wb') as fout:
    pickle.dump(results, fout)