In [None]:
import numpy as np
import lzma as xz
import sys

In [None]:
files = ['data/experiments_out/out_dataset_{}_{}_trial_{}.csv.xz'.format(i, m, j)
         for i in [1, 2]
         for j in [1, 2]
         for m in ['acktr', 'ddpg', 'ppo2', 'trpo']]
for i, file in enumerate(files):
    print(i, '\t', file)

In [None]:
data = []
N = 100000
for file in files:
    print('Reading {}...'.format(file))
    with xz.open(file, 'rt') as f:
        means, vars_, xs = [], [], []
        for i, line in enumerate(f):
            xs.append(float(line.split(',')[3]))
            if i % N == 0:
                means.append(np.mean(xs))
                vars_.append(np.var(xs))
                xs = []
                sys.stdout.write('\r{:6.2f}% done...'.format(100 * i / 1e7))
    sys.stdout.write('\n')
    data.append({
        'mean': means,
        'vars': vars_
    })

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
methods = ['ACKTR', 'DDPG', 'PPO2', 'TRPO']
fig, axs = plt.subplots(1, 4, figsize=(9, 1.75), sharey=True)
for j in range(0, 4):
    for i in range(j * 4, (j + 1) * 4):
        axs[j].plot(np.arange(len(data[i]['mean']) - 1) * N / 1e6, data[i]['mean'][1:], label=methods[i % 4])
    axs[j].set_ylim(-4, -2)
    axs[j].set_xlabel('Iterations $\\times 10^6$')

axs[0].set_ylabel('Average reward')
axs[0].legend(loc='best', ncol=4)

fig.savefig('../doc/media/results_.pdf', bbox_inches='tight')