In [2]:
import math
import random
import os

import numpy as np

from comet_ml import API
from matplotlib import pyplot as plt
import pandas as pd
from scipy import stats

COMET_API_KEY="bSyRm6vJpAwfehizXic7Fo0bY"
COMET_REST_API_KEY="S3g50KZWG8zEgk1PLzKUn0eEq"


In [320]:
client = API(api_key=COMET_API_KEY, rest_api_key=COMET_REST_API_KEY)
results = {
    'baselines': {},
    'transfer': {},
    'fl_10_8_9_11_12': {},
    'fl_10_5_8_12_16': {},
    'fl_10_8.5_9.5_10.5_11': {}
    
}
for exp in client.get("jh-jl-rlfl/ddpg-baselines"):
    params = client.get_experiment_parameters(exp)
    metrics = client.get_experiment_metrics(exp)
    gravity = next(p for p in params if p['name'] == 'g')['valueMax']
    trailing_avg = next(p for p in metrics if p['name'] == 'trailing_avg_5')['valueCurrent']
    results['baselines'][float(gravity)] = float(trailing_avg)
    
for exp in client.get("jh-jl-rlfl/transfer"):
    params = client.get_experiment_parameters(exp)
    metrics = client.get_experiment_metrics(exp)
    tag = client.get_experiment_tags(exp)[0]
    gravity = next(p for p in params if p['name'] == 'g')['valueMax']
    trailing_avg = next(p for p in metrics if p['name'] == 'trailing_avg_5')['valueCurrent']
    results[tag][float(gravity)] = float(trailing_avg)

In [321]:
gravities = list(results['baselines'].keys())
gravities.sort()
gravities = gravities

In [324]:
colors = ['orange', 'gray', 'green', 'red', 'darkblue']
labels = ['no-transfer', 'g: 10,10,10,10,10', 'g: 8,9,10,11,12', 'g: 5,8,10,12,16', 'g: 8.5,9.5,10,10.5,11']
fig, ax = plt.subplots(figsize=(10,5))
for idx, (key, result) in enumerate(results.items()):
    plt.plot(gravities, [result[g] for g in gravities], label=labels[idx], color=colors[idx])

ax.set_title("Reward from Policy and Value Network Transfer")
ax.set_xlabel("Gravity (Inverse Pendulum)", fontsize=13)
ax.set_ylabel("Trailing 5-Episode Average Reward (5K frames)", fontsize=13)
ax.grid()
ax.legend()
ax.set_xlim(7.,15)
plt.show()


In [318]:
client = API(api_key=COMET_API_KEY, rest_api_key=COMET_REST_API_KEY)

alpha_results = {
}
betas = {}
for exp in client.get("jh-jl-rlfl/ddpg-fl-kl"):
    params = client.get_experiment_parameters(exp)
    metrics = client.get_experiment_metrics(exp)
    gravities = next(p for p in params if p['name'] == 'gravities')['valueMax']
    alpha = float(next(p for p in params if p['name'] == 'alpha')['valueMax'])
    beta = next(p for p in params if p['name'] == 'beta')['valueMax']
    betas[beta] = beta
    reward = next(p for p in metrics if p['name'] == 'round_reward_avg')['valueCurrent']
    std = next(p for p in metrics if p['name'] == 'round_reward_std')['valueCurrent']
    if gravities not in alpha_results.keys():
        alpha_results[gravities] = []
    alpha_results[gravities].append({
        'reward': float(reward),
        'std': float(std),
        'alpha': alpha,
        'beta': float(beta)        
    })

In [293]:
lines = list(alpha_results.keys())
colors = ['red', 'green', 'darkblue', 'gray']
labels = ['g: 5,8,10,12,16', 'g: 8,9,10,11,12', 'g: 8.5,9.5,10,10.5,11', 'g: 10,10,10,10,10'] 
lines = ['[10,5,8,12,16]', '[10,8,9,11,12]', '[10,8.5,9.5,10.5,11]', '[10,10,10,10,10]']
points = {}
xs = [0.,0.0001, 0.0003, 0.003, 0.01, 0.03, 0.1, 0.2, 0.3]
a = np.arange(len(xs))

%matplotlib osx

for idx, line in enumerate(lines[:5]):
    rewards = [(p['beta'], p['reward'], p['std']) for p in alpha_results[line]]
    rewards.sort()
    points[line] = rewards
    
fig, ax = plt.subplots(figsize=(10,5))
for idx, (key, point_set) in enumerate(points.items()):
    my_xs = [beta for beta, reward, _ in point_set]
    if len(my_xs) != len(xs):
        continue
    plt.plot(a, [reward for beta, reward, _ in point_set], label=labels[idx], color=colors[idx])
    
    
    variance = [(reward - std, reward + std) for _, reward, std in point_set]
    upper = [upper for _, upper in variance]
    lower = [lower for lower, _ in variance]
#    plt.fill_between(a, upper, lower, where=upper>=lower,  facecolor=colors[idx],  interpolate=True, alpha=0.1)


ax.set_title("Reward KL and Entropy Regularization")
ax.set_xlabel("Beta value", fontsize=13)
ax.set_ylabel("Trailing Average Reward (9.6K frames over 12 rounds)", fontsize=13)
ax.legend()
ax.grid(alpha=0.5)
ax.set_xlim(0., 0.3)
ax.yaxis.set_ticks(np.arange(-440,-320, 10))
ax.xaxis.set_ticks(a) #set the ticks to be a
ax.xaxis.set_ticklabels(xs) # change the ticks' names to x

plt.show()


[0.0, 0.0001, 0.0003, 0.003, 0.01, 0.03, 0.1, 0.2, 0.3]
[0.0, 0.0001, 0.0003, 0.003, 0.01, 0.03, 0.1, 0.2, 0.3]
[0.0, 0.0001, 0.0003, 0.003, 0.01, 0.03, 0.1, 0.2, 0.3]
[0.0, 0.0001, 0.0003, 0.003, 0.01, 0.03, 0.1, 0.2, 0.3]


In [8]:
%matplotlib osx
client = API(api_key=COMET_API_KEY, rest_api_key=COMET_REST_API_KEY)
FL_EXP = '893c4fd5310e4566830e4e2210f73c87'
metrics = client.get_experiment_metrics_raw(FL_EXP)
rewards = {
    'round_reward_std': [],
    'round_reward_avg': [],
    'reward.n0': [],
    'reward.n1': [],
    'reward.n2': [],
    'reward.n3': [],
    'reward.n4': [],   
}
for metric in metrics:
    name_parts = []
    if metric['metricName'] in rewards.keys():
        rewards[metric['metricName']].append(float(metric['metricValue']))

labels = ['g:8','g:9','g:10','g:11','g:12']
colors = ['darkblue','blue','lightblue','aqua','teal']
fig, ax = plt.subplots(figsize=(10,5))
xticks = np.arange(10)
for idx,label in enumerate(labels):
    plt.plot(xticks, rewards[f'reward.n{idx}'], label=label, color=colors[idx])

plt.plot(xticks, rewards['round_reward_avg'], label='Round Average Reward', color='red', linewidth=2)
upper = [rewards['round_reward_avg'][idx] + std for idx,std in enumerate(rewards['round_reward_std'])]
lower = [rewards['round_reward_avg'][idx] - std for idx,std in enumerate(rewards['round_reward_std'])]
plt.fill_between(xticks, upper, lower, where=upper>=lower,  facecolor='red',  interpolate=True, alpha=0.1)

    
ax.set_title("Federated Averaging for 5 Environments (GravityPendulum)")
ax.set_xlabel("Frames", fontsize=13)
ax.set_ylabel("Episode reward", fontsize=13)
ax.legend()
ax.grid(alpha=0.5)
ax.set_xlim(0,9)
xs = [str(lbl * 1000) for lbl in xticks]
ax.xaxis.set_ticks(xticks) #set the ticks to be a
ax.xaxis.set_ticklabels(xs) # change the ticks' names to x

plt.show()


{'round_reward_std': [188.54484528029167,
  116.00425067309058,
  296.32600093028975,
  250.37593646942122,
  257.7993869030532,
  187.40545951027843,
  191.3462351168584,
  146.49601106684005,
  172.3598445716023,
  132.46384129761012],
 'round_reward_avg': [-1299.8482641866103,
  -1291.7773499850239,
  -1092.274736505746,
  -1159.8150736588905,
  -891.0051831930396,
  -582.5967021409591,
  -561.2614399897186,
  -468.7066565069457,
  -441.79281790322176,
  -399.1895736278323],
 'reward.n0': [-1463.722061831519,
  -1148.3090423743436,
  -714.5335481663673,
  -944.8987421744405,
  -258.70797946866844,
  -397.3474652152409,
  -391.9054460244825,
  -262.1677448762461,
  -257.4528144858408,
  -259.863554466089],
 'reward.n1': [-1515.7953968096967,
  -1320.3198580807407,
  -944.26424866525,
  -1210.1920053279107,
  -810.1497014094105,
  -262.17883559414645,
  -388.44464549578964,
  -383.61497773621545,
  -255.25251362122805,
  -257.21562616291226],
 'reward.n2': [-1488.7703833304563,
  -142