In [1]:
import seaborn as sns
import pylab as plot
import matplotlib.pyplot as plt
import numpy as np

# Set plotting
sns.set_style("ticks")
sns.set_palette(sns.color_palette("hls", 8))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
params = {'legend.fontsize': 15}
plot.rcParams.update(params)
plt.rc('xtick',labelsize=17)
plt.rc('ytick',labelsize=17)

In [5]:
# Plotting Via TensorBoard

def plot_data_cpu_gpu(data_sac):
    xseries_sac, yseries_sac = data_sac


    plt.figure()
    plt.title("SAC+MER vs. SAC")
    plt.xlabel("Time steps")
    plt.ylabel("Episode Return")
    
    mean_reward = np.mean(np.array([yseries_sac[0]]), axis=0)
    std_err = scipy.stats.sem(yseries_sac[0], axis=0)
    h = std_err * scipy.stats.t.ppf((1.0 + 0.95) / 2.0, yseries_sac.shape[0]-1)
    plt.plot(xseries_sac[0], mean_reward, label="SAC+MER")
    plt.fill_between(xseries_sac[0], mean_reward + h, mean_reward - h, alpha=0)

    mean_reward = np.mean(np.array([yseries_sac[1]]), axis=0)
    std_err = scipy.stats.sem(yseries_sac[1], axis=0)
    h = std_err * scipy.stats.t.ppf((1.0 + 0.95) / 2.0, yseries_sac[0].shape[0]-1)
    plt.plot(xseries_sac[1], mean_reward, label="SAC")
    plt.fill_between(xseries_sac[1], mean_reward + h, mean_reward - h, alpha=0)
    plt.axvline(x=0, color='r')
    plt.axvline(x=10000, color='r')
    plt.axvline(x=20000, color='r')
    plt.axvline(x=30000, color='r')

    plt.legend(loc='lower right')

    plt.show()

def moving_average(data_set, periods=20):
    weights = np.ones(periods) / periods
    return np.convolve(data_set, weights, mode='valid')

def get_rewards_time(path):
    df = pd.read_csv(os.path.join(path))
    values = df.iloc[:, 2].values
    values = moving_average(values)
    times = df.iloc[:, 0].values
    first_value = times[0]
    last_value = times[len(times)-1]
    difference = last_value-first_value
    times = np.linspace(0, difference, len(values))/60
    return times, values

def get_rewards(path):
    df = pd.read_csv(os.path.join(path))
    values = df.iloc[:, 2].values
    values = moving_average(values)
    times = df.iloc[:, 1].values
    times = np.linspace(0, times[-1], len(values))
    return times, values


def load_data(data_dir):
    dirs = [d for d in os.listdir(data_dir) if d.startswith("run")]
    xseries = []
    yseries = []
    for d in dirs:
        path = os.path.join(data_dir, d)
        x, y = get_rewards(path)
        xseries.append(x)
        yseries.append(y)

    yseries = np.array(yseries)
    xseries = np.array(xseries)


    return xseries, yseries

In [3]:
data_sac = load_data('cartpole_training_returns/')