In [None]:
import os

In [None]:
os.chdir(os.path.split(os.getcwd())[0])

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
%matplotlib inline

In [None]:
import matplotlib.style as style

In [None]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [None]:
style.use('seaborn-poster')

### Parse raw logs and create DataFrame

In [None]:
def parse_raw_log(log):
    data = []
    lines = log.split('\n')
    for line in lines[:-1]:
        raw = line.split(': ')
        if raw[0].split(', ')[1] == 'LOG':
            entries = raw[1].split(', ')
            datum = []
            for entry in entries:
                value = entry.split(':')[1]
                try:
                    datum.append(float(value))
                except ValueError:
                    datum.append(value)

            data.append(datum)
    
    return data

In [None]:
experiment_folder = None
log_path = os.path.join('experiments', experiment_folder, 'training.log')
with open(log_path, 'r') as f:
    log = f.read()
    
data = parse_raw_log(log)

columns = ['episode', 'step', 'action', 'kind', 'reward', 'best_mean_reward', 'loss', 'epsilon', 'S', 'c', 'delta', 'n', 'dn', 'cost', 'pnl', 'K', 'T']
train_df = pd.DataFrame(data, columns = columns)
train_df = train_df.astype({'episode': int, 'step': int, 'action': int, 'dn': int})

In [None]:
train_df[train_df['episode'] == 1].head()

### Visualize arbitrary episode

In [None]:
@interact
def plot_episode(episode = (1, 30000, 1)):
    fig, ax = plt.subplots(figsize = (12, 8))
    episode_df = train_df[train_df['episode'] == episode]
    ax.plot(episode_df['step'], episode_df['n'], color = 'blue', lw = 1.5, label = 'n')
    ax.plot(episode_df['step'], episode_df['delta'] * -100, color = 'orange', lw = 1.5, label = 'delta')
    ax.scatter(episode_df[episode_df['kind'] == 'random']['step'], episode_df[episode_df['kind'] == 'random']['n'], color = 'red', label = 'random')#, facecolor = 'None')
    ax.scatter(episode_df[episode_df['kind'] == 'policy']['step'], episode_df[episode_df['kind'] == 'policy']['n'], color = 'green', label = 'policy')
    ax.legend()
    plt.show()

In [None]:
fig, ax = plt.subplots(figsize = (12, 8))
groupby_episode = train_df.groupby('episode').mean()
rewards = train_df.groupby('episode').sum()['reward'].values[:-1]
best_mean_reward = groupby_episode['best_mean_reward'].values[:-1]
mean_loss = groupby_episode['loss'].values[:-1]

ax.plot(np.arange(1, len(rewards) + 1), np.clip(rewards, -np.inf, None), label = 'reward', color = 'blue', lw = 0.1)
ax.plot(np.arange(1, len(best_mean_reward) + 1), np.clip(best_mean_reward, -np.inf, None), label = 'best_mean_reward', color = 'red', lw = 1.5)
ax.set_ylabel('reward')
ax.set_xlabel('episode')
#fig.savefig('reward_vs_episode.png', dpi = 300)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize = (14, 10), nrows = 2, ncols = 1, sharex = True)
groupby_episode = train_df.groupby('episode').mean()
rewards = train_df.groupby('episode').sum()['reward'].values[:-1]
best_mean_reward = groupby_episode['best_mean_reward'].values[:-1]
mean_loss = groupby_episode['loss'].values[:-1]

ax[0].plot(np.arange(1, len(rewards) + 1), np.clip(rewards, -np.inf, None), label = 'reward', color = 'blue', lw = 0.2)
ax[0].plot(np.arange(1, len(best_mean_reward) + 1), np.clip(best_mean_reward, -np.inf, None), label = 'best_mean_reward', color = 'red', lw = 1.5)
ax[0].set_ylabel('reward')

ax[1].plot(np.arange(1, len(mean_loss) + 1), mean_loss, label = 'loss', color = 'blue', lw = 1)
ax[1].set_xlabel('episode')
ax[1].set_ylabel('loss')
plt.show()