In [1]:
from evaluate_model import evaluate_agent
import d3rlpy
from d3rlpy import dataset
from d3rlpy.algos.bcq import BCQ
import numpy as np
import csv

from wget import download
from env.abr import ABRSimEnv
import matplotlib.pyplot as plt

import ray
import ray.rllib.agents.ppo as ppo
from train_baseline import myEnv

from policies import BBAAgent
import pandas as pd

In [2]:
cql_model = d3rlpy.algos.DiscreteCQL.from_json("d3rlpy_logs/DiscreteCQL_20211115220329/params.json")
CQL_MODEL_WEIGHTS = "d3rlpy_logs/DiscreteCQL_20211115220329/model_8950.pt"
cql_model.load_model(CQL_MODEL_WEIGHTS)



In [3]:
bcq_model = d3rlpy.algos.DiscreteBCQ.from_json("d3rlpy_logs/DiscreteBCQ_20211115222009/params.json")
BCQ_MODEL_WEIGHTS = "d3rlpy_logs/DiscreteBCQ_20211115222009/model_8950.pt"
bcq_model.load_model(BCQ_MODEL_WEIGHTS)



In [4]:
ray.init()
config = ppo.DEFAULT_CONFIG.copy()
config["num_gpus"] = 0
config["num_workers"] = 1
config["lr"] = 1e-2
config["lambda"] = 0.96
config["gamma"] = 0.96
config["entropy_coeff_schedule"] = [(0, 0.2), (2500*490, 0.0)]
config["model"]["fcnet_hiddens"] = [64, 32]
config["model"]["fcnet_activation"] = "relu" 
config["rollout_fragment_length"] = 490

ppo_model = ppo.PPOTrainer(config=config, env=myEnv)
ppo_model.load_checkpoint("models/ppo/checkpoint_005201/checkpoint-5201")

2021-12-04 22:53:25,634	INFO trainer.py:722 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also want to then set `eager_tracing=True` in order to reach similar execution speed as with static-graph mode.
2021-12-04 22:53:25,639	INFO ppo.py:166 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2021-12-04 22:53:25,640	INFO trainer.py:743 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=89557)[0m Setting up environment..


2021-12-04 22:54:10,655	INFO trainable.py:124 -- Trainable.setup took 45.027 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [5]:
bba_model = BBAAgent(env = ABRSimEnv())

In [6]:
models = [cql_model, bcq_model, ppo_model, bba_model]

In [7]:
cql_eval = evaluate_agent(cql_model)

Setting up environment..


In [8]:
bcq_eval = evaluate_agent(bcq_model)

Setting up environment..


In [9]:
ppo_eval = evaluate_agent(ppo_model, ppo=True)

Setting up environment..




In [10]:
bba_eval = evaluate_agent(bba_model, bba=True)

Setting up environment..


In [11]:
evals = [cql_eval, bcq_eval, ppo_eval, bba_eval]

In [12]:
def eval_to_df(eval):
    cols = ["traj"] + list(eval[0].keys())
    iters = len(eval[0]['action'])
    rows = []
    for ind, traj in enumerate(eval):
        for iter in range(iters):
            row = [ind]
            row.append(traj['action'][iter])
            row.append(traj['buffer_length'][iter])
            row.append(traj['action_bitrate'][iter])
            row.append(traj['download_time'][iter])
            row.append(traj['throughput'][iter])
            row.append(traj['reward'][iter])
            rows.append(row)
    
    return pd.DataFrame(rows, columns=cols)

In [13]:
cql_df = eval_to_df(cql_eval)
bcq_df = eval_to_df(bcq_eval)
ppo_df = eval_to_df(ppo_eval)
bba_df = eval_to_df(bba_eval)
dfs = [cql_df, bcq_df, ppo_df, bba_df]

In [14]:
import plotly.express as px
import plotly.graph_objects as go

In [15]:
summary_dict = {"cql" : [cql_df.groupby(by="traj").mean(), cql_df.groupby(by="traj").sum()],
        "bcq": [bcq_df.groupby(by="traj").mean(), bcq_df.groupby(by="traj").sum()],
        "ppo" : [ppo_df.groupby(by="traj").mean(), ppo_df.groupby(by="traj").sum()],
        "bba" : [bba_df.groupby(by="traj").mean(), bba_df.groupby(by="traj").sum()]
}

In [16]:
cols = ["model", "traj", "average throughput", "total reward"]
rows = []
for m in summary_dict:
    avg_thpt = list(summary_dict[m][0]["throughput"])
    tot_rew = list(summary_dict[m][1]["reward"])
    for ind, tup in enumerate(zip(avg_thpt, tot_rew)):
        thpt, rew = tup
        rows.append([m, ind, thpt, rew])

summary_df = pd.DataFrame(rows, columns=cols)

In [17]:
px.scatter(summary_df, x="average throughput", y="total reward", color="model", trendline="ols",
        title="Model evaluations over 200 trajectories")

In [18]:
import plotly.figure_factory as ff

In [19]:
hist_data = []
group_labels = []
for model in summary_dict:
    hist_data.append(summary_dict[model][1]["reward"])
    group_labels.append(model)

fig = ff.create_distplot(hist_data, group_labels, bin_size=100, show_hist=False)
fig.show()

In [20]:
fig = px.ecdf(summary_df, x="total reward", color="model")
fig.show()