# Make plots of reward and average depth for the optimistic planning procedure in dRGP-TS

In [None]:
# import modules
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.colors as col
from matplotlib import gridspec
import numpy as np
from pylab import figure, show, ylim, xlim
import os
import itertools
from cycler import cycler


In [None]:
os.chdir('../saves/')

In [None]:
# load in data files for various l's
rgpts_d4_K10 = {}
rgpts_d8_K10 = {}
rgpts_d4_K30 = {}
for i in range(13):
    if i<=11:
        rgpts_d4_K10[i] = np.load('rgpts_opplan_l4_d4_K10_%d.npz' %i)
        rgpts_d4_K30[i] = np.load('rgpts_opplan_l4_d4_K30_%d.npz' %i)
    if i<=12:
        rgpts_d8_K10[i] = np.load('rgpts_opplan_l4_d8_K10_%d.npz' %i)

# Process data

In [None]:
# get reward data out of files
cumrewRGPTS_d4_K10 = {}
cumrewRGPTS_d8_K10 = {}
cumrewRGPTS_d4_K30 = {}
for i in range(13):
    if i<=11:
        cumrewRGPTS_d4_K10[i] = rgpts_d4_K10[i]['meancumrewRGPUCB']
        cumrewRGPTS_d4_K30[i] = rgpts_d4_K30[i]['meancumrewRGPUCB']
    if i<=12:
        cumrewRGPTS_d8_K10[i] = rgpts_d8_K10[i]['meancumrewRGPUCB']


In [None]:
# get confidence bounds on reward out
quantRewRGPTS_d4_K10 = {}
quantRewRGPTS_d8_K10 = {}
quantRewRGPTS_d4_K30 = {}
for i in range(13):
    if i <=11:
        quantRewRGPTS_d4_K10[i] = rgpts_d4_K10[i]['quantCumRew'].item()
        quantRewRGPTS_d4_K30[i] = rgpts_d4_K30[i]['quantCumRew'].item()
    if i<=12:
        quantRewRGPTS_d8_K10[i] = rgpts_d8_K10[i]['quantCumRew'].item()


In [None]:
# make mean vectors of reward to plot
cumrewRGPTS_d4_K10_mean = np.array([cumrewRGPTS_d4_K10[key][-1] for key in cumrewRGPTS_d4_K10])
cumrewRGPTS_d8_K10_mean = np.array([cumrewRGPTS_d8_K10[key][-1] for key in cumrewRGPTS_d8_K10])
cumrewRGPTS_d4_K30_mean = np.array([cumrewRGPTS_d4_K30[key][-1] for key in cumrewRGPTS_d4_K30])

In [None]:
# make cb vectors of reward to plot
# 5%
cumrewRGPTS_d4_K10_05 = np.array([quantRewRGPTS_d4_K10[key]['5'][-1] for key in quantRewRGPTS_d4_K10])
cumrewRGPTS_d8_K10_05 = np.array([quantRewRGPTS_d8_K10[key]['5'][-1] for key in quantRewRGPTS_d8_K10])
cumrewRGPTS_d4_K30_05 = np.array([quantRewRGPTS_d4_K30[key]['5'][-1] for key in quantRewRGPTS_d4_K30])

# 95%
cumrewRGPTS_d4_K10_95 = np.array([quantRewRGPTS_d4_K10[key]['95'][-1] for key in quantRewRGPTS_d4_K10])
cumrewRGPTS_d8_K10_95 = np.array([quantRewRGPTS_d8_K10[key]['95'][-1] for key in quantRewRGPTS_d8_K10])
cumrewRGPTS_d4_K30_95 = np.array([quantRewRGPTS_d4_K30[key]['95'][-1] for key in quantRewRGPTS_d4_K30])

In [None]:
# get depth data from files
myds_d4_K10 = {}
myds_d8_K10 = {}
myds_d4_K30 = {}
for i in range(13):
    if i<=11:
        myds_d4_K10[i] = rgpts_d4_K10[i]['MYDS']
        myds_d4_K30[i] = rgpts_d4_K30[i]['MYDS']
    if i<=12:
        myds_d8_K10[i] = rgpts_d8_K10[i]['MYDS']


In [None]:
# calculate average depths
avgd_d4_K10 = np.array([np.mean(np.array(myds_d4_K10[key])) for key in myds_d4_K10])
avgd_d8_K10 = np.array([np.mean(np.array(myds_d8_K10[key])) for key in myds_d8_K10])
avgd_d4_K30 = np.array([np.mean(np.array(myds_d4_K30[key])) for key in myds_d4_K30])

# Make plots of reward and average d
## d=4 K=10

In [None]:
# make plots for d4 K10
xvals = np.array([100,500,1000,2000,3000,4000,5000,6000,7000,8000,9000,10000])

fig = plt.figure(figsize=(8, 6))
# set height ratios for sublots
gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1]) 

# first subplot
ax0 = plt.subplot(gs[0])

ax0.plot(xvals, cumrewRGPTS_d4_K10_05, linewidth=1, color='#1f77b4')
ax0.plot(xvals, cumrewRGPTS_d4_K10_mean, label="2RGP-TS", linewidth=4, color='#1f77b4')
ax0.plot(xvals, cumrewRGPTS_d4_K10_95, linewidth=1, color='#1f77b4')
ax0.fill_between(xvals, cumrewRGPTS_d4_K10_05, cumrewRGPTS_d4_K10_95, facecolor='#1f77b4',
                  alpha=0.4)

#the second subplot
# shared axis X
ax1 = plt.subplot(gs[1], sharex = ax0)
ax1.plot(xvals, avgd_d4_K10, linewidth=4, color='#ff7f0e')
plt.setp(ax0.get_xticklabels(), visible=False)
# remove last tick label for the second subplot
yticks = ax1.yaxis.get_major_ticks()
yticks[-1].label1.set_visible(False)
ax1.set_yticks(np.array([2.,3.,4.]))

# add labels to axis
ax0.set_ylabel('Total Reward \n at $T=1000$', fontsize=22)
ax1.set_ylabel('Average $d_N$', fontsize=22)
ax1.set_xlabel('Policies per Lookahead ($N$)', fontsize=22)
ax1.xaxis.set_tick_params(labelsize=18,width=3,length=6)
ax0.xaxis.set_tick_params(width=3,length=6)
ax1.yaxis.set_tick_params(labelsize=18,width=3,length=6)
ax0.yaxis.set_tick_params(labelsize=18,width=3,length=6)

# remove vertical gap between subplots
plt.subplots_adjust(hspace=.15)
plt.show()

In [None]:
fig.savefig("/figures/opplan_l4_d4_K10.pdf",bbox_inches='tight')

## d=8 K=10

In [None]:
# make plots for d8 K10
xvals = np.array([100,500,1000,5000,10000,20000,30000,40000,50000,60000,70000,80000,90000])#,100000])

fig = plt.figure(figsize=(8, 6))
# set height ratios for sublots
gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1]) 

# first subplot
ax0 = plt.subplot(gs[0])

ax0.plot(xvals, cumrewRGPTS_d8_K10_05, linewidth=1, color='#1f77b4')
ax0.plot(xvals, cumrewRGPTS_d8_K10_mean, label="2RGP-TS", linewidth=4, color='#1f77b4')
ax0.plot(xvals, cumrewRGPTS_d8_K10_95, linewidth=1, color='#1f77b4')
ax0.fill_between(xvals, cumrewRGPTS_d8_K10_05, cumrewRGPTS_d8_K10_95, facecolor='#1f77b4',
                  alpha=0.4)

#the second subplot
# shared axis X
ax1 = plt.subplot(gs[1], sharex = ax0)
ax1.plot(xvals, avgd_d8_K10, linewidth=4, color='#ff7f0e')
plt.setp(ax0.get_xticklabels(), visible=False)
# remove last tick label for the second subplot
yticks = ax1.yaxis.get_major_ticks()
yticks[-1].label1.set_visible(False)
ax1.set_yticks(np.array([2.,4.,6.,8.]))
ax1.set_yticks(np.array([3.,5.,7.]), minor=True)

# add labels to axis
ax0.set_ylabel('Total Reward \n at $T=1000$', fontsize=22)
ax1.set_ylabel('Average $d_N$', fontsize=22)
ax1.set_xlabel('Policies per Lookahead ($N$)', fontsize=22)
ax1.xaxis.set_tick_params(labelsize=18,width=3,length=6)
ax0.xaxis.set_tick_params(width=3,length=6)
ax1.yaxis.set_tick_params(labelsize=18,width=3,length=6, which='both')
ax0.yaxis.set_tick_params(labelsize=18,width=3,length=6)

# remove vertical gap between subplots
plt.subplots_adjust(hspace=.15)
plt.show()

In [None]:
fig.savefig("/figures/opplan_l4_d8_K10.pdf",bbox_inches='tight')

In [None]:
# make plots for d4 K30
xvals = np.array([100,500,1000,5000,10000,20000,25000,30000,35000,40000,45000,50000])

fig = plt.figure(figsize=(8, 6))
# set height ratios for sublots
gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1]) 

# first subplot
ax0 = plt.subplot(gs[0])

ax0.plot(xvals, cumrewRGPTS_d4_K30_05, linewidth=1, color='#1f77b4')
ax0.plot(xvals, cumrewRGPTS_d4_K30_mean, label="2RGP-TS", linewidth=4, color='#1f77b4')
ax0.plot(xvals, cumrewRGPTS_d4_K30_95, linewidth=1, color='#1f77b4')
ax0.fill_between(xvals, cumrewRGPTS_d4_K30_05, cumrewRGPTS_d4_K30_95, facecolor='#1f77b4',
                  alpha=0.4)

#the second subplot
# shared axis X
ax1 = plt.subplot(gs[1], sharex = ax0)
ax1.plot(xvals, avgd_d4_K30, linewidth=4, color='#ff7f0e')
plt.setp(ax0.get_xticklabels(), visible=False)
# remove last tick label for the second subplot
yticks = ax1.yaxis.get_major_ticks()
yticks[-1].label1.set_visible(False)
ax1.set_yticks(np.array([1.,2.,3.,4.]))

# add labels to axis
ax0.set_ylabel('Total Reward \n at $T=1000$', fontsize=22)
ax1.set_ylabel('Average $d_N$', fontsize=22)
ax1.set_xlabel('Policies per Lookahead ($N$)', fontsize=22)
ax1.xaxis.set_tick_params(labelsize=18,width=3,length=6)
ax0.xaxis.set_tick_params(width=3,length=6)
ax1.yaxis.set_tick_params(labelsize=18,width=3,length=6)
ax0.yaxis.set_tick_params(labelsize=18,width=3,length=6)

# remove vertical gap between subplots
plt.subplots_adjust(hspace=.15)
plt.show()

In [None]:
fig.savefig("/figures/opplan_l4_d4_K30.pdf",bbox_inches='tight')