In [49]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import seaborn as sns
sns.set_color_codes()
import pandas as pd
import numpy as np
import os
import math
from matplotlib.ticker import FuncFormatter

In [44]:
# base_dir = '/Users/tchu/Documents/rl_test/deeprl_dist/atsc_large_grid/'
# base_dir = '/Users/tchu/Documents/rl_test/deeprl_dist/atsc_real_net/'
# base_dir = '/Users/tchu/Documents/rl_test/deeprl_dist/cacc_catchup/'
base_dir = '/Users/tchu/Documents/rl_test/deeprl_dist/cacc_slowdown/'
plot_dir = base_dir + 'plots/'
if not os.path.exists(plot_dir):
    os.mkdir(plot_dir)
algo_names = ['IA2C', 'ConseNet', 'FPrint', 'DIAL', 'CommNet', 'NeurComm']
algo_labels = ['ia2c', 'ia2c_cu', 'ia2c_fp', 'ma2c_dial', 'ma2c_cnet', 'ma2c_nc']
color_cycle = sns.color_palette()
TRAIN_STEP = 1e6
# TRAIN_STEP = 1e5
train_dir = base_dir + 'training/'
# moving average window to smoothen curves
window = 100
test_dir = base_dir + 'execution/'
exp_name = base_dir.split('/')[-2]
print(exp_name)

def millions(x, pos):
    return '%1.1fM' % (x*1e-6)

def millions1(x, pos):
    return '%1.2fM' % (x*1e-6)

def thousands(x, pos):
    return '%dK' % (x*1e-3)

cacc_slowdown


### Detailed training curves over alpha levels per algorithm

In [10]:



def plot_single_train_curve(algo_i):
    data = []
    alphas = [0.9, 1.0]
    for alpha in alphas:
        data_dir = train_dir + ('%s_%.1f.csv' % (algo_labels[algo_i], alpha))
        data.append(pd.read_csv(data_dir))
    plt.figure()

    colors = 'br'
    for i, alpha in enumerate(alphas):
        df = data[i]
        print('%s, alpha=%.1f, avg R_bar of last 50 episodes: %.2f' % (algo_names[algo_i], alpha, np.mean(df.avg_reward.values[-50:])))
        # use moving avg to make plot smoother
        x_mean = df.avg_reward.rolling(window).mean().values
        x_std = df.std_reward.rolling(window).mean().values
        plt.plot(df.step.values, x_mean, color=colors[i], linewidth=3, label=r'$\alpha$=%.1f' % (alpha))
        plt.fill_between(df.step.values, x_mean - x_std, x_mean + x_std, facecolor=colors[i], edgecolor='none', alpha=0.1)

    plt.xlim([0, TRAIN_STEP])
    plt.ylim([-375, -125])

    formatter = FuncFormatter(millions)
    plt.gca().xaxis.set_major_formatter(formatter)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.grid(True, which='both')
#     plt.xlabel('Training step', fontsize=20)
#     plt.ylabel('Average episode reward', fontsize=20)
    plt.legend(loc='lower right', fontsize=18)
    plt.tight_layout()
    plt.savefig(plot_dir + ('/%s_train.pdf' % algo_labels[algo_i]))
    plt.close()
    
for i in range(6):
    plot_single_train_curve(i)

IA2C, alpha=0.9, avg R_bar of last 50 episodes: -163.47
IA2C, alpha=1.0, avg R_bar of last 50 episodes: -202.42
ConseNet, alpha=0.9, avg R_bar of last 50 episodes: -188.64
ConseNet, alpha=1.0, avg R_bar of last 50 episodes: -199.54
FPrint, alpha=0.9, avg R_bar of last 50 episodes: -169.98
FPrint, alpha=1.0, avg R_bar of last 50 episodes: -178.53
DIAL, alpha=0.9, avg R_bar of last 50 episodes: -251.94
DIAL, alpha=1.0, avg R_bar of last 50 episodes: -214.48
CommNet, alpha=0.9, avg R_bar of last 50 episodes: -247.62
CommNet, alpha=1.0, avg R_bar of last 50 episodes: -169.78
NeurComm, alpha=0.9, avg R_bar of last 50 episodes: -228.49
NeurComm, alpha=1.0, avg R_bar of last 50 episodes: -145.93


### Ablation Study: alpha impact

In [45]:
comp_data = []
algos = ['ia2c', 'ma2c_cnet']
# algos = ['ia2c', 'ma2c_nc', 'ma2c_cnet']
alphas = [0.8, 0.9, 1.0]
highlights = [0.9, 1.0]
highlights = [0.9, 0.9]
highlights = [1.0, 1.0]
highlights = [0.8, 1.0]
alpha_labels = {0.8:'0.8', 0.6:'0.6', 0.9:'0.9', 0.95:'0.95', 1.0:'1.0'}
for algo in algos:
    data = []
    for alpha in alphas:
#         if algo == 'ma2c_cnet' and alpha == 0.8:
#             data.append(None)
#             continue
#         if exp_name.startswith('atsc'):
        data_dir = train_dir + ('%s_%s.csv' % (algo, alpha_labels[alpha]))
        data.append(pd.read_csv(data_dir))
#         else:
#             cur_dfs = []
#             for trial in range(1, 4):
#                 if not trial:
#                     data_dir = train_dir + ('%s_%s.csv' % (algo, alpha_labels[alpha]))
#                 else:
#                     data_dir = train_dir + ('%s_%s_%d.csv' % (algo, alpha_labels[alpha], trial))
#                 cur_df = pd.read_csv(data_dir)
#                 cur_df['trial_id'] = trial
#                 cur_dfs.append(cur_df)
#             data.append(pd.concat(cur_dfs))
    comp_data.append(data)
plt.figure(figsize=(8,6))


for j, alpha in enumerate(alphas):
    for i, algo in enumerate(algos):
#         if algo == 'ma2c_cnet' and alpha == 0.8:
#             continue
        df = comp_data[i][j]
#         if exp_name.startswith('atsc'):
        x_mean = df.avg_reward.rolling(window).mean().values
#             x_std = df.std_reward.rolling(window).mean().values
        x_final = np.mean(df.avg_reward.values[-50:])
        t = df.step.values
#         else:
#             xs, final_xs = [], []
#             for trial in range(1, 4):
#                 x = df[df.trial_id == trial].avg_reward
#                 final_xs.append(x.values[-1])
#                 xs.append(x)
#                 xs.append(x.rolling(window).mean().values)
            
#             xs = np.array(xs)
#             x_final = np.mean(np.array(final_xs))
#             x_mean = np.mean(xs, axis=0)
# #             x_std = np.std(xs, axis=0)
#             t = df[df.trial_id == 1].step.values
        print('%s, alpha=%.2f, avg R_bar of last k episodes: %.2f' % (algo, alpha, x_final))
        
#         print('{},{}: {:.2f}, {:.2f}'.format(alpha, algo, np.nanmean(x_mean), np.nanmean(x_std)))
        if algo == 'ia2c':
            color = color_cycle[0]
        else:
            color = color_cycle[1]
        if alpha >= 0.95:
            sty = '-'    
#         elif alpha == 0.9:
#             sty = '--'
        elif alpha >= 0.9:
            sty = '-.'
        else:
            sty = ':'
        if alpha == highlights[i]:
            lw = 4
            ap = 1
        else:
            lw = 2
            ap = 0.75
        plt.plot(t, x_mean, sty, color=color, linewidth=lw, label='%s,'% algo_names[algo_labels.index(algo)] + \
                 alpha_labels[alpha], alpha=ap)
#         plt.fill_between(df.step.values, x_mean - x_std, x_mean + x_std, facecolor=color, edgecolor='none', alpha=0.1)
plt.xlim([0, TRAIN_STEP])
# Grid
# plt.ylim([-400, -150])

formatter = FuncFormatter(millions)
plt.gca().xaxis.set_major_formatter(formatter)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.grid(True, which='both')
plt.xlabel('Training step', fontsize=18)
plt.ylabel('Average episode reward', fontsize=18)
# plt.legend(loc='lower right', ncol=3, fontsize=13)
# plt.legend(loc='lower left', bbox_to_anchor=(0, 1, 1, 0.2), mode='expand', ncol=3, fontsize=14)
plt.tight_layout()
plt.savefig(plot_dir + ('/alpha_comp.pdf'))
# plt.savefig(plot_dir + ('/alpha_comp2.pdf'))
plt.close()



ia2c, alpha=0.80, avg R_bar of last k episodes: -529.23
ma2c_cnet, alpha=0.80, avg R_bar of last k episodes: -881.84
ia2c, alpha=0.90, avg R_bar of last k episodes: -884.92
ma2c_cnet, alpha=0.90, avg R_bar of last k episodes: -1124.34
ia2c, alpha=1.00, avg R_bar of last k episodes: -2951.67
ma2c_cnet, alpha=1.00, avg R_bar of last k episodes: -795.22


### Ablation Study: NeurComm impact

In [46]:
comp_data = []
algos = ['ma2c_baseline', 'ma2c_concat', 'ma2c_fprint', 'ma2c_nc_1.0']
labels = ['Baseline', 'Concat Only', 'FPrint Only', 'NeurComm']
for algo in algos:
    if algo in ['ma2c_baseline', 'ma2c_nc_1.0', 'ma2c_fprint', 'ma2c_concat']:
        data_dir = train_dir + ('%s.csv' % (algo))
        comp_data.append(pd.read_csv(data_dir))
    else:
        comp_data.append(None)

plt.figure(figsize=(8,6))
colors = [1, 0, 2, 3]
stys = [':', '-.', '-.', '-']
for i, algo in enumerate(algos):
    if i not in [0, 2, 3, 1]:
        continue
    df = comp_data[i]
    x_mean = df.avg_reward.rolling(window).mean().values
    x_std = df.std_reward.rolling(window).mean().values
    color = color_cycle[colors[i]]
    sty = stys[i]
    if algo == 'ma2c_nc_1.0':
        lw = 4
        ap = 1
    else:
        lw = 3
        ap = 0.75
    plt.plot(df.step.values, x_mean, sty, color=color, linewidth=lw, alpha=ap, label=labels[i])
#         plt.fill_between(df.step.values, x_mean - x_std, x_mean + x_std, facecolor=color, edgecolor='none', alpha=0.1)
plt.xlim([0, TRAIN_STEP])
# plt.ylim([-365, -135])
formatter = FuncFormatter(millions)
plt.gca().xaxis.set_major_formatter(formatter)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.grid(True, which='both')
plt.xlabel('Training step', fontsize=18)
plt.ylabel('Average episode reward', fontsize=18)
# plt.legend(loc='lower right', ncol=2, fontsize=16)
# plt.legend(loc='lower left', bbox_to_anchor=(0, 1, 1, 0.2), mode='expand', ncol=2, fontsize=16)
plt.tight_layout()
plt.savefig(plot_dir + ('/comm_comp.pdf'))
plt.close()

### Training result: plot

In [48]:
#'IA2C', 'ConseNet', 'FPrint', 'DIAL', 'CommNet', 'NeurComm'
colors = [0, 5, 2, 6, 1, 3]
# # ATSC Grid
# alphas = [0.9, 0.9, 0.95] + [1.0] * 3
# ylim = [-375, -125]
# # ATSC Monaco
# alphas = [0.9, 0.9, 0.9] + [0.9, 0.9, 1.0]
# ylim = [-650, -200]
# # CACC Catchup
# alphas = [1.0] * 6
# ylim = [-2000, 100]
# CACC Slowdown
alphas = [0.8, 0.8, 0.9] + [1.0, 1.0, 1.0]
ylim = [-3000, -300]
alpha_labels = {0.8:'0.8', 0.6:'0.6', 0.9:'0.9', 0.95:'0.95', 1.0:'1.0'}
comp_data = []
for alpha, algo in zip(alphas, algo_labels):
#     if algo not in ['ma2c_cnet', 'ma2c_nc', 'ma2c_dial']:
#         comp_data.append(None)
#         continue
    data_dir = train_dir + ('%s_%s.csv' % (algo, alpha_labels[alpha]))
    comp_data.append(pd.read_csv(data_dir))
    
plt.figure(figsize=(8,6))
for i in [0, 2, 1, 3, 4, 5]:
#     if i not in [4,5, 3]:
#         continue
    df = comp_data[i]
    print('%s, avg R_bar of last 50 episodes: %.2f' % (algo_names[i], np.mean(df.avg_reward.values[-50:])))
    algo = algo_names[i]
    x_mean = df.avg_reward.rolling(window).mean().values
    x_std = df.avg_reward.rolling(window).std().values
    if i < 3:
        sty = ':'
    else:
        sty = '-'
    if i == 5:
        lw = 4
        ap = 1
    else:
        lw = 3
        ap = 0.75
    plt.plot(df.step.values, x_mean, sty, color=color_cycle[colors[i]], linewidth=lw, alpha=ap, label=algo)
    plt.fill_between(df.step.values, x_mean - x_std, x_mean + x_std, facecolor=color_cycle[colors[i]], edgecolor='none', alpha=0.1)
plt.xlim([0, TRAIN_STEP])
plt.ylim(ylim)
formatter = FuncFormatter(millions)
plt.gca().xaxis.set_major_formatter(formatter)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.grid(True, which='both')
plt.xlabel('Training step', fontsize=18)
plt.ylabel('Average episode reward', fontsize=18)
# plt.legend(loc='lower right',  ncol=2, fontsize=16)
# plt.legend(loc='lower left', bbox_to_anchor=(0, 1, 1, 0.2), mode='expand', ncol=3, fontsize=18)
plt.tight_layout()
plt.savefig(plot_dir + ('/train_comp.pdf'))
plt.close()

IA2C, avg R_bar of last 50 episodes: -529.23
FPrint, avg R_bar of last 50 episodes: -682.26
ConseNet, avg R_bar of last 50 episodes: -822.98
DIAL, avg R_bar of last 50 episodes: -1055.15
CommNet, avg R_bar of last 50 episodes: -795.22
NeurComm, avg R_bar of last 50 episodes: -894.51


In [253]:
colors = [0, 5, 2, 6, 1, 3]
# # CACC Catchup
# ylim = [-6000, 0]
# # CACC Slowdown
# ylim = [-2000, -400]
alpha_labels = {0.8:'0.8', 0.6:'0.6', 0.9:'0.9', 0.95:'0.95', 1.0:'1.0'}
comp_data = []
best_finals = [[-float('inf')] * 5 for _ in range(len(algo_labels))]
best_alphas = [[0] * 5 for _ in range(len(algo_labels))]

for algo in np.array(algo_labels):
    cur_dfs = []
    for trial in range(1, 6):
        cur_df_alphas = []
        for alpha in [0.95, 1]:
            if not trial:
                data_dir = train_dir + ('%s_%s.csv' % (algo, alpha_labels[alpha]))
            else:
                data_dir = train_dir + ('%s_%s_%d.csv' % (algo, alpha_labels[alpha], trial))
            cur_df_alphas.append(pd.read_csv(data_dir))
        x_final_1 = cur_df_alphas[0].avg_reward.values[-1]
        x_final_2 = cur_df_alphas[1].avg_reward.values[-1]
        if x_final_1 > x_final_2:
            best_finals[algo_labels.index(algo)][trial-1] = x_final_1
            best_alphas[algo_labels.index(algo)][trial-1] = 0.95
            cur_df = cur_df_alphas[0]
        else:
            best_finals[algo_labels.index(algo)][trial-1] = x_final_2
            best_alphas[algo_labels.index(algo)][trial-1] = 1
            cur_df = cur_df_alphas[1]
        cur_df['trial_id'] = trial
        cur_dfs.append(cur_df)
    df = pd.concat(cur_dfs)
    comp_data.append(df)
    
plt.figure(figsize=(8,6))
for i in [0, 3, 1, 4, 2, 5]:
# for i1 in range(3):
#     i = [0,4,5][i1]
    df = comp_data[i]
    xs, final_xs = [], []
    for trial in range(1, 6):
        x = df[df.trial_id == trial].avg_reward
        final_xs.append(x.values[-1])
        xs.append(x.rolling(window).mean().values)
    xs = np.array(xs)
    final_xs = np.array(final_xs)
    print('%s, avg R_bar of last episode: %.2f' % (algo_names[i], np.mean(final_xs)))
    algo = algo_names[i]
    x_mean = np.mean(xs, axis=0)
    x_std = np.std(xs, axis=0)
    if i < 3:
        sty = ':'
    else:
        sty = '-'
    ts = df[df.trial_id == 1].step.values
    plt.plot(ts, x_mean, sty, color=color_cycle[colors[i]], linewidth=3, label=algo)
    plt.fill_between(ts, x_mean - x_std, x_mean + x_std, facecolor=color_cycle[i], edgecolor='none', alpha=0.1)
plt.xlim([0, TRAIN_STEP])
plt.ylim(ylim)
formatter = FuncFormatter(thousands)
plt.gca().xaxis.set_major_formatter(formatter)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.grid(True, which='both')
plt.xlabel('Training step', fontsize=18)
plt.ylabel('Average episode reward', fontsize=18)
# plt.legend(loc='lower left', bbox_to_anchor=(0, 1, 1, 0.2), mode='expand', ncol=3, fontsize=18)
plt.tight_layout()
plt.savefig(plot_dir + ('/train_comp.pdf'))
plt.close()

print('='*10 + 'FINAL' + '='*10)
for i in range(6):
    print(algo_names[i])
    print('best alphas: {}'.format(best_alphas[i]))
    Rs, violations = [], 0
    print('best finals: {}'.format(best_finals[i]))
    for R in best_finals[i]:
        if R > -1000:
            Rs.append(R)
        else:
            violations += 1
    print('avg R_bar: {:.2f}, collision number: {:d}'.format(np.mean(Rs), violations))

IA2C, avg R_bar of last episode: -684.44
DIAL, avg R_bar of last episode: -619.39
ConseNet, avg R_bar of last episode: -665.96
CommNet, avg R_bar of last episode: -636.75
FPrint, avg R_bar of last episode: -689.25
NeurComm, avg R_bar of last episode: -657.48
IA2C
best alphas: [1, 1, 1, 1, 1]
best finals: [-530.6652572756196, -748.3210763595446, -600.867879931384, -1179.4013597725545, -362.93944210134117]
avg R_bar: -560.70, collision number: 1
ConseNet
best alphas: [1, 0.95, 0.95, 1, 1]
best finals: [-443.44698548647233, -653.1516921207358, -664.7826260344099, -1215.2992371469254, -353.1333441909887]
avg R_bar: -528.63, collision number: 1
FPrint
best alphas: [1, 0.95, 1, 0.95, 0.95]
best finals: [-425.2583270354055, -697.7867176713187, -652.7752375520131, -1302.6096361735474, -367.8137209531051]
avg R_bar: -535.91, collision number: 1
DIAL
best alphas: [1, 0.95, 1, 1, 0.95]
best finals: [-517.4721102882304, -584.5785640125223, -557.1715036659043, -1083.3631248606907, -354.351869482337

### ATSC execution result: summary

In [36]:
# # ATSC Grid
# prefix = 'large_grid'
# # ATSC Monaco
prefix = 'atsc_real_net'
table_names = ['traffic', 'trip', 'control']


def load_summarize_data():
    data = {}
    for algo in ['ia2c', 'ia2c_fp', 'ia2c_cu', 'ma2c_nc', 'ma2c_cnet', 'ma2c_dial']:
#     for algo in ['ma2c_nc', 'ma2c_cnet', 'ma2c_dial']:
        print(algo)
        data[algo] = {}
        for tab in table_names:
            data_dir = test_dir + ('%s_%s_%s.csv' % (prefix, algo, tab))
            df = pd.read_csv(data_dir)
            if tab == 'control':
                col_names = ['reward']
            elif tab == 'traffic':
                col_names = ['avg_queue', 'avg_speed_mps', 'avg_wait_sec', 'number_arrived_car']
            else:
                col_names = ['wait_sec']
            print_metrics(df, col_names, tab)
            data[algo][tab] = df
    return data  
            
def print_metrics(df, col_names, tab_name):
    if tab_name == 'control':
        rewards = df.groupby(['episode']).reward.mean()
        mean_reward = np.mean(rewards.values)
        std_reward = np.std(rewards.values)
        print('R_bar: mean %.2f, std %.2f' % (mean_reward, std_reward))
    episodes = list(df.episode.unique())
    num_episode = len(episodes)
    for name in col_names:
        if tab_name != 'trip':
            res = df.loc[df.episode == episodes[0], name].values
            for episode in episodes[1:]:
                res += df.loc[df.episode == episode, name].values
            res = res / num_episode
            print('%s: mean %.2f, min %.2f, max: %.2f' % (name, np.mean(res), np.min(res), np.max(res)))
        else:
            res = []
            for episode in episodes:
                res += list(df.loc[df.episode == episode, name].values)
            print('%s: mean %d, max %d' % (name, np.mean(res), np.max(res)))
            
data = load_summarize_data()

ia2c
avg_queue: mean 1.93, min 0.00, max: 2.97
avg_speed_mps: mean 2.36, min 0.00, max: 13.58
avg_wait_sec: mean 147.45, min 0.00, max: 290.02
number_arrived_car: mean 0.22, min 0.00, max: 0.72
wait_sec: mean 295, max 3147
R_bar: mean -369.66, std 45.53
reward: mean -369.66, min -554.66, max: 0.00
ia2c_fp
avg_queue: mean 1.87, min 0.00, max: 2.78
avg_speed_mps: mean 1.26, min 0.00, max: 13.77
avg_wait_sec: mean 174.84, min 0.00, max: 286.77
number_arrived_car: mean 0.11, min 0.00, max: 0.42
wait_sec: mean 428, max 3356
R_bar: mean -359.37, std 42.32
reward: mean -359.37, min -529.66, max: 0.00
ia2c_cu
avg_queue: mean 2.74, min 0.00, max: 4.51
avg_speed_mps: mean 1.03, min 0.00, max: 13.54
avg_wait_sec: mean 187.27, min 0.00, max: 303.03
number_arrived_car: mean 0.07, min 0.00, max: 0.38
wait_sec: mean 540, max 3387
R_bar: mean -528.94, std 37.23
reward: mean -528.94, min -860.84, max: 0.00
ma2c_nc
avg_queue: mean 1.27, min 0.00, max: 1.88
avg_speed_mps: mean 0.55, min 0.00, max: 13.35


### ATSC execution result: plot

In [37]:
EPISODE_SEC = 3600
colors = [0, 5, 2, 6, 1, 3]
def plot_comp_series(algos, col_name, tab_name, ylabel, window=60):
    plt.figure(figsize=(8,6))
    for algo in algos:
        df = data[algo][tab_name]
        plot_series(df, algo, col_name, tab_name, window)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.xlim([0, 3600])
    if col_name == 'avg_queue':
        plt.ylim([0, 3.5])
    else:
        plt.ylim([0, 700])
    plt.grid(True, which='both')
    plt.xlabel('Simulation time (sec)', fontsize=18)
    plt.ylabel(ylabel, fontsize=18)
#     plt.legend(loc='upper left', fontsize=18)
    plt.tight_layout()
    plt.savefig(plot_dir + ('/%s_comp.pdf' % col_name))
    plt.close()
    
def plot_series(df, algo, col_name, tab_name, window):
    episodes = list(df.episode.unique())
    num_episode = len(episodes)
    x = np.zeros((num_episode, EPISODE_SEC))
    for i, episode in enumerate(episodes):
        t_col = 'arrival_sec' if  tab_name == 'trip' else 'time_sec' 
        cur_df = df[df.episode == episode].sort_values(t_col)
        if window > 0:
            cur_x = cur_df[col_name].rolling(window, min_periods=1).mean().values
        else:
            cur_x = cur_df[col_name].values    
        x[i] = cur_x
    x_mean = np.mean(x, axis=0)
    x_std = np.std(x, axis=0)
    t = np.arange(1, EPISODE_SEC + 1)
    algo_i = algo_labels.index(algo)
#     print(algo_i)
    color = color_cycle[colors[algo_i]]
    if algo == 'ma2c_nc':
        lw = 4
        ap = 1
    else:
        lw = 3
        ap = 0.75
    plt.plot(t, x_mean, color=color, linewidth=lw, alpha=ap, label=algo_names[algo_i])
    x_lo = x_mean - x_std
    x_lo = np.maximum(x_lo, 0)
    x_hi = x_mean + x_std
    plt.fill_between(t, x_lo, x_hi, facecolor=color, edgecolor='none', alpha=0.1)

cur_algos = ['ma2c_nc', 'ma2c_cnet', 'ia2c', 'ia2c_fp']
plot_comp_series(cur_algos, 'avg_queue', 'traffic', 'Average queue length (veh)')
plot_comp_series(cur_algos, 'avg_wait_sec', 'traffic', 'Average intersection delay (s/veh)')

### CACC execution result: summary

In [337]:

prefix = exp_name.split('_')[1]
table_names = ['traffic', 'control']

def load_summarize_data():
    data = {}
    for algo in ['ia2c', 'ia2c_fp', 'ia2c_cu', 'ma2c_nc', 'ma2c_cnet', 'ma2c_dial']:
#     for algo in ['ma2c_nc', 'ma2c_cnet']:
        print(algo)
        data[algo] = {}
        for tab in table_names:
            data_dir = test_dir + ('%s_%s_%s.csv' % (prefix, algo, tab))
            df = pd.read_csv(data_dir)
            if tab == 'control':
                col_names = ['reward']
            elif tab == 'traffic':
                df['avg_headway_m'] = 0
                df['avg_speed_mps'] = 0
                df['avg_accel_mps2'] = 0
                for i in range(1, 9):
                    df['avg_headway_m'] += df['headway_{:d}_m'.format(i)]
                    df['avg_speed_mps'] += df['velocity_{:d}_mps'.format(i)]
                    df['avg_accel_mps2'] += abs(df['accel_{:d}_mps2'.format(i)])
                df['avg_headway_m'] /= 8
                df['avg_speed_mps'] /= 8
                df['avg_accel_mps2'] /= 8
                col_names = ['avg_headway_m', 'avg_speed_mps', 'avg_accel_mps2']
            print_metrics(df, col_names, tab)
            data[algo][tab] = df
    return data  
            
def print_metrics(df, col_names, tab_name):
    if tab_name == 'control':
        rewards = df.groupby(['episode']).reward.mean()
        mean_reward = np.mean(rewards.values)
        print('R_bar: mean %.2f' % (mean_reward))
        return
    episodes = list(df.episode.unique())
    num_episode = len(episodes)
    num_violation = 0
    for k, name in enumerate(col_names):
        res = np.zeros(601)
        for episode in episodes:
            cur_res = df.loc[df.episode == episode, name].values
            if len(cur_res) == 601:
                res += cur_res
            elif not k:
                num_violation += 1
        res = res / num_episode
        print('%s: mean %.2f, std: %.2f' % (name, np.mean(res), np.std(res)))
    print('VIOLIATION: %d' % num_violation)
            
data = load_summarize_data()

ia2c
avg_headway_m: mean 0.00, std: 0.00
avg_speed_mps: mean 0.00, std: 0.00
avg_accel_mps2: mean 0.00, std: 0.00
VIOLIATION: 50
R_bar: mean -2209.37
ia2c_fp
avg_headway_m: mean 18.21, std: 2.40
avg_speed_mps: mean 15.47, std: 3.37
avg_accel_mps2: mean 0.56, std: 0.48
VIOLIATION: 8
R_bar: mean -697.85
ia2c_cu
avg_headway_m: mean 11.60, std: 0.49
avg_speed_mps: mean 8.59, std: 1.19
avg_accel_mps2: mean 0.23, std: 0.28
VIOLIATION: 23
R_bar: mean -1038.09
ma2c_nc
avg_headway_m: mean 15.84, std: 2.10
avg_speed_mps: mean 13.43, std: 2.77
avg_accel_mps2: mean 0.50, std: 0.27
VIOLIATION: 13
R_bar: mean -934.73
ma2c_cnet
avg_headway_m: mean 16.24, std: 2.16
avg_speed_mps: mean 13.82, std: 2.88
avg_accel_mps2: mean 0.50, std: 0.29
VIOLIATION: 12
R_bar: mean -950.84
ma2c_dial
avg_headway_m: mean 14.42, std: 1.70
avg_speed_mps: mean 12.28, std: 2.49
avg_accel_mps2: mean 0.45, std: 0.21
VIOLIATION: 16
R_bar: mean -1111.68


### CACC execution result: plot

In [338]:
def plot_comp_series(algos, col_name, tab_name, ylabel, window=-1):
    plt.figure(figsize=(8,6))
#     i_episode = -1
    for algo in algos:
        df = data[algo][tab_name]
#         if i_episode == -1:
#             i_episode = np.argmax(data[algo]['control'].groupby('episode').reward.mean().values)+1
        df = df[df.episode == 1]
        plot_series(df, algo, col_name, tab_name, window)
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.xlim([0, 60])
    plt.grid(True, which='both')
    plt.xlabel('Simulation time (sec)', fontsize=18)
    plt.ylabel(ylabel, fontsize=18)
    plt.legend(loc='upper right', fontsize=18)
    plt.tight_layout()
    plt.savefig(plot_dir + ('/%s_comp.pdf' % col_name))
    plt.close()
    
def plot_series(df, algo, col_name, tab_name, window):
    algo_i = algo_labels.index(algo)
    color = color_cycle[colors[algo_i]]
    t = df.time_sec.values
    for veh_i in [1, 8]:
        if col_name == 'headway':
            cur_col_name = 'headway_{:d}_m'.format(veh_i)
        else:
            cur_col_name = 'velocity_{:d}_mps'.format(veh_i)
        if window > 0:
            cur_x = df[cur_col_name].rolling(window, min_periods=1).mean().values
        else:
            cur_x = df[cur_col_name].values    
        style = '-' if veh_i == 8 else ':'
        plt.plot(t, cur_x, color=color, linewidth=3, linestyle=style,
                 label='{}, veh# {:d}'.format(algo_names[algo_i], veh_i))
# Catchup
cur_algos = ['ma2c_nc', 'ia2c_fp']
# # Slowdown
# cur_algos = ['ma2c_nc', 'ia2c']
plot_comp_series(cur_algos, 'headway', 'traffic', 'Vehicle headway (m)')
plot_comp_series(cur_algos, 'velocity', 'traffic', 'Vehicle velocity (m/s)')

In [45]:
a = np.arange(30, 15-0.1, -2.5*0.02)
print(len(a))


302


array([30.  , 29.95, 29.9 , 29.85, 29.8 , 29.75, 29.7 , 29.65, 29.6 ,
       29.55, 29.5 , 29.45, 29.4 , 29.35, 29.3 , 29.25, 29.2 , 29.15,
       29.1 , 29.05, 29.  , 28.95, 28.9 , 28.85, 28.8 , 28.75, 28.7 ,
       28.65, 28.6 , 28.55, 28.5 , 28.45, 28.4 , 28.35, 28.3 , 28.25,
       28.2 , 28.15, 28.1 , 28.05, 28.  , 27.95, 27.9 , 27.85, 27.8 ,
       27.75, 27.7 , 27.65, 27.6 , 27.55, 27.5 , 27.45, 27.4 , 27.35,
       27.3 , 27.25, 27.2 , 27.15, 27.1 , 27.05, 27.  , 26.95, 26.9 ,
       26.85, 26.8 , 26.75, 26.7 , 26.65, 26.6 , 26.55, 26.5 , 26.45,
       26.4 , 26.35, 26.3 , 26.25, 26.2 , 26.15, 26.1 , 26.05, 26.  ,
       25.95, 25.9 , 25.85, 25.8 , 25.75, 25.7 , 25.65, 25.6 , 25.55,
       25.5 , 25.45, 25.4 , 25.35, 25.3 , 25.25, 25.2 , 25.15, 25.1 ,
       25.05, 25.  , 24.95, 24.9 , 24.85, 24.8 , 24.75, 24.7 , 24.65,
       24.6 , 24.55, 24.5 , 24.45, 24.4 , 24.35, 24.3 , 24.25, 24.2 ,
       24.15, 24.1 , 24.05, 24.  , 23.95, 23.9 , 23.85, 23.8 , 23.75,
       23.7 , 23.65,