In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
!wandb login 9676e3cc95066e4865586082971f2653245f09b4

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/gd1279/.netrc
[32mSuccessfully logged in to Weights & Biases![0m


In [11]:
import numpy as np
import pandas as pd
import scipy
from scipy import stats
from scipy.special import factorial

from mpl_toolkits.mplot3d import Axes3D
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import patches
from matplotlib import path as mpath
import matplotlib.gridspec as gridspec

import pickle
import tabulate
import wandb

In [12]:
api = wandb.Api()

In [6]:
run_seeds = np.arange(200, 210)

initial_runs = [run for run in api.runs('augmented-frostbite/initial-experiments')
                if run.config['seed'] in run_seeds]

In [None]:
q_value_means = []
q_value_stds = []

reward_means = []
reward_stds = []

for run in initial_runs:
    history = run.history(pandas=True)
    print(run.name)
    q_value_means.append(np.array(history['Q_value_mean'], dtype=np.float))
    q_value_stds.append(np.array(history['Q_value_std'], dtype=np.float))
    
    reward_means.append(np.array(history['reward_mean'], dtype=np.float))
    reward_stds.append(np.array(history['reward_std'], dtype=np.float))
    
for result_list in (q_value_means, q_value_stds, reward_means, reward_stds):
    max_len = max([arr.shape[0] for arr in result_list])
    
    for i, arr in enumerate(result_list):
        result_list[i] = np.pad(arr, (0, max_len - arr.shape[0]), 'constant', constant_values=np.nan)

# Basic plots

In [None]:
NROWS = 1
NCOLS = 2
COL_WIDTH = 6
ROW_HEIGHT = 5 
WIDTH_SPACING = 2
HEIGHT_SPACING = 0
COLORMAP = 'cool'

figure = plt.figure(figsize=(NCOLS * COL_WIDTH + WIDTH_SPACING, NROWS * ROW_HEIGHT + HEIGHT_SPACING))
plt.subplots_adjust(hspace=0.4, wspace=0.2)

x = np.arange(1, len(reward_means[0]) + 1) * 10000
cmap = plt.get_cmap(COLORMAP)

reward_ax = plt.subplot(NROWS, NCOLS, 1)

for i, (r_mean, r_std) in enumerate(zip(reward_means, reward_stds)):
    color = cmap(i / 9)
    reward_ax.plot(x, r_mean, lw=1, color=color)
    reward_ax.fill_between(x, r_mean - r_std, r_mean + r_std, color=color, alpha=0.10)
    
overall_reward_mean = np.nanmean(reward_means, axis=0)
reward_ax.plot(x, overall_reward_mean, lw=2, color='black')
    
# reward_ax.set_yscale('log')
reward_ax.set_title('Rewards')
reward_ax.set_xlabel('Steps (1 step = 4 frames, 200k frames ~ 1 hr @ 60 fps)')
reward_ax.set_ylabel('Reward')

@matplotlib.ticker.FuncFormatter
def million_formatter(x, pos):
    if x == 0:
        return 0
    
    return f'{x / 10 ** 6:.1f}M'

reward_ax.xaxis.set_major_formatter(million_formatter)


q_ax = plt.subplot(NROWS, NCOLS, 2)

for i, (q_mean, q_std) in enumerate(zip(q_value_means, q_value_stds)):
    color = cmap(i / 9)
    q_ax.plot(x, q_mean, color=color, lw=1)
    q_ax.fill_between(x, q_mean - q_std, q_mean + q_std, color=color, alpha=0.10)
    
overall_q_mean = np.nanmean(q_value_means, axis=0)
q_ax.plot(x, overall_q_mean, lw=2, color='black')
    
# reward_ax.set_yscale('log')
q_ax.set_title('Q-values')
q_ax.set_xlabel('Steps (1 step = 4 frames, 200k frames ~ 1 hr @ 60 fps)')
q_ax.set_ylabel('Average Q-value')

q_ax.xaxis.set_major_formatter(million_formatter)

plt.show()

In [None]:
initial_runs[0].config['seed']

In [8]:
initial_runs[0].name, initial_runs[0].id 

('data-efficient-5M-201', 'yslgd3ls')

In [None]:
h = initial_runs[0].history()

In [None]:
h['steps'].iat[-1]

In [None]:
for existing_run in api.runs('augmented-frostbite/initial-experiments'):
    if existing_run.config['seed'] == 123:
        print(existing_run.history()['steps'])

In [None]:
files = initial_runs[0].files()

In [None]:
for f in initial_runs[0].files('config2.yaml'):
    print(f.name)

In [None]:
initial_runs[0].file('config2.yaml')

In [None]:
r = initial_runs[0]

In [None]:
for r in api.runs('augmented-frostbite/initial-experiments'):
    print(r.name, r.storage_id)

In [None]:
dir(initial_runs[0])

In [None]:
tqdm.trange?

In [None]:
s = 'cabac'

s == s[::-1]

In [13]:
runs = api.runs('augmented-frostbite/initial-experiments', 
                {"$and": [{"config.id": "data-efficient-resume-test"}, {"config.seed": 200}]})
r = runs[0]

In [14]:
h = r.history(samples=1000)

In [15]:
h

In [17]:
len(h)

0