In [2]:
%matplotlib inline 
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
sns.set_color_codes()
import pandas as pd
import numpy as np
import os
import xml.etree.cElementTree as ET

## plot training curves in small grid env

In [4]:
base_dir = '/Users/tchu/Documents/rl_test/signal_control_results'
plot_dir = base_dir + '/plots'
if not os.path.exists(plot_dir):
    os.mkdir(plot_dir)
COLORS = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'purple', 'pink',
          'brown', 'orange', 'teal', 'coral', 'lightblue', 'lime', 'lavender', 'turquoise',
          'darkgreen', 'tan', 'salmon', 'gold', 'lightpurple', 'darkred', 'darkblue']
TRAIN_STEP = 2e6
NUM_ENV = 16

In [None]:
def plot_train_curve(scenario='small_grid'):
    cur_dir = base_dir + '/train/'
    names = ['global', 'local', 'neighbor']
    labels = ['Centralized A2C', 'Independent A2C', 'Multi-agent A2C']
    dfs = {}
    for file in os.listdir(cur_dir):
        if not file.endswith('.csv'):
            continue
        if not file.startswith(scenario):
            continue
        name = file.split('_')[2]
        if name in names:
            df = pd.read_csv(cur_dir + '/' + file)
            dfs[name] = df

    plt.figure(figsize=(10,8))
    ymin = min([df.Value.min() for df in dfs.values()])
    ymax = max([df.Value.max() for df in dfs.values()])
    xmin = min([df.Step.min() for df in dfs.values()])
    window = 50
    for i, name in enumerate(names):
        df = dfs[name]
        x_mean = df.Value.rolling(window).mean().values
        x_std = df.Value.rolling(window).std().values
        plt.plot(df.Step.values, x_mean, color=COLORS[i], linewidth=3, label=labels[i])
        x_lo = np.maximum(x_mean - x_std, 0)
        x_hi = x_mean + x_std
        plt.fill_between(df.Step.values, x_lo, x_hi, facecolor=COLORS[i], edgecolor='none', alpha=0.3)

    plt.xlim([xmin,TRAIN_STEP])
    plt.ylim([ymin * 1.05, ymax * 0.95])
    plt.xticks(fontsize=15)
    plt.yticks(fontsize=15)
    # plt.gca().xaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e'))
    plt.xlabel('Training step', fontsize=20)
    plt.ylabel('Moving averaged episode reward', fontsize=20)
    plt.legend(loc='best', fontsize=20)
    plt.tight_layout()
    # plt.savefig(plot_dir + '/small_grid_train.png')
    plt.savefig(plot_dir + ('/%s_train.pdf' % scenario))
    plt.close()

    # calculate performance gains
    print('final performance wrt centralized agent:')
    ys = {}
    for name in names:
        y = dfs[name].Value.values
        final = np.mean(y[-window:])
        init = np.mean(y[:window])
        gain = final - init
        ys[name] = (final, gain)

    f_global = ys['global'][0]
    f_local = ys['local'][0]
    f_neighbor = ys['neighbor'][0]
    print('global: %.2f, local: %.2f(%.2f%%), neighbor: %.2f(%.2f%%)' % 
          (f_global, f_local, (f_local / f_global) * 100, f_neighbor, (f_neighbor / f_global) * 100))

    print('performance gain wrt centralized agent:')
    g_global = ys['global'][1]
    g_local = ys['local'][1]
    g_neighbor = ys['neighbor'][1]
    print('global: %.2f, local: %.2f(%.2f%%), neighbor: %.2f(%.2f%%)' % 
          (g_global, g_local, (g_local / g_global) * 100, g_neighbor, (g_neighbor / g_global) * 100))
    
plot_train_curve()

## plot training curves in large grid env

In [None]:
plot_train_curve(scenario='large_grid')

## plot evaluation curves in small grid env

In [9]:
episode_sec = 7200
def fixed_agg(xs, window, agg):
    xs = np.reshape(xs, (-1, window))
    if agg == 'sum':
        return np.sum(xs, axis=1)
    elif agg == 'mean':
        return np.mean(xs, axis=1)
    elif agg == 'median':
        return np.median(xs, axis=1)

def varied_agg(xs, ts, window, agg):
    t_bin = window
    x_bins = []
    cur_x = []
    for x, t in zip(list(xs) + [0], list(ts) + [episode_sec + 1]):
        if t <= t_bin:
            cur_x.append(x)
        else:
            if not len(cur_x):
                x_bins.append(0)
            else:
                if agg == 'sum':
                    x_stat = np.sum(np.array(cur_x))
                elif agg == 'mean':
                    x_stat = np.mean(np.array(cur_x))
                elif agg == 'median':
                    x_stat = np.median(np.array(cur_x))
                x_bins.append(x_stat)
            t_bin += window
            cur_x = [x]
    return np.array(x_bins)
    
def plot_series(df, name, tab, label, color, window=None, agg='sum', reward=False):
    episodes = list(df.episode.unique())
    num_episode = len(episodes)
    num_time = episode_sec
    if reward:
        num_time = 720
    if window and (agg != 'mv'):
        num_time = num_time // window
    x = np.zeros((num_episode, num_time))
    for i, episode in enumerate(episodes):
        t_col = 'arrival_sec' if  tab == 'trip' else 'time_sec' 
        cur_df = df[df.episode == episode].sort_values(t_col)
        if window and (agg == 'mv'):
            cur_x = cur_df[name].rolling(window, min_periods=1).mean().values
        else:
            cur_x = cur_df[name].values    
        if window and (agg != 'mv'):
            if tab == 'trip':
                cur_x = varied_agg(cur_x, df[df.episode == episode].arrival_sec.values, window, agg)
            else:    
                cur_x = fixed_agg(cur_x, window, agg)
        x[i] = cur_x
    if num_episode > 1:
        x_mean = np.mean(x, axis=0)
        x_std = np.std(x, axis=0)
    else:
        x_mean = x[0]
        x_std = np.zeros(num_time)
    if (not window) or (agg == 'mv'):
        t = np.arange(1, episode_sec + 1)
        if reward:
            t = np.arange(10, episode_sec + 1, 10)
    else:
        t = np.arange(window, episode_sec + 1, window)
    if reward:
        print('%s: %.2f' % (label, np.mean(x_mean)))
    plt.plot(t, x_mean, color=color, linewidth=3, label=label)
    if num_episode > 1:
        x_lo = np.maximum(x_mean - x_std, 0)
        x_hi = x_mean + x_std
        plt.fill_between(t, x_lo, x_hi, facecolor=color, edgecolor='none', alpha=0.3)
        return np.min(x_lo[30:]), np.max(x_hi[30:])
    else:
        return np.min(x_mean[30:]), np.max(x_mean[30:])
    
def plot_combined_series(dfs, agent_names, col_name, tab_name, agent_labels, y_label, fig_name,
                         window=None, agg='sum', reward=False):
    plt.figure(figsize=(10,8))
    ymin = np.inf
    ymax = -np.inf
    for i, aname in enumerate(agent_names):
        df = dfs[aname][tab_name]
        y0, y1 = plot_series(df, col_name, tab_name, agent_labels[i], COLORS[i], window=window, agg=agg,
                             reward=reward)
        ymin = min(ymin, y0)
        ymax = max(ymax, y1)
    
    plt.xlim([0, episode_sec])
    plt.ylim([ymin, ymax])
    plt.xticks(fontsize=15)
    plt.yticks(fontsize=15)
    plt.xlabel('Simulation time (sec)', fontsize=20)
    plt.ylabel(y_label, fontsize=20)
    plt.legend(loc='best', fontsize=20)
    plt.tight_layout()
    plt.savefig(plot_dir + ('/%s.pdf' % fig_name))
    plt.close()
    
def sum_reward(x):
    x = [float(i) for i in x.split(',')]
    return np.sum(x)

def plot_eval_curve(scenario='small_grid', date='may22'):
    cur_dir = base_dir + ('/eval/%s_%s/eva_data' % (scenario, date))
#     names = ['global', 'local', 'neighbor']
    names = ['naive', 'neighbor']
#     labels = ['Centralized A2C', 'Independent A2C', 'Multi-agent A2C']
    labels = ['Greedy policy', 'Multi-agent A2C']
    dfs = {}
    for file in os.listdir(cur_dir):
        if not file.endswith('.csv'):
            continue
        if not file.startswith(scenario):
            continue
        name = file.split('_')[2]
        measure = file.split('_')[3].split('.')[0]
        if name in names:
            df = pd.read_csv(cur_dir + '/' + file)
            if measure == 'traffic':
                df['ratio_stopped_car'] = df.number_stopped_car / df.number_total_car * 100
            if measure == 'control':
                df['global_reward'] = df.reward.apply(sum_reward)
            if name not in dfs:
                dfs[name] = {}
            dfs[name][measure] = df
    
    # plot stopped car ratio
    plot_combined_series(dfs, names, 'ratio_stopped_car', 'traffic', labels,
                         'Stopped car ratio (%)', scenario + '_stopcar', window=60, agg='mv')
    # plot avg speed
    plot_combined_series(dfs, names, 'average_speed', 'traffic', labels,
                         'Average car speed (m/s)', scenario + '_speed', window=60, agg='mv')
    # plot avg waiting time
    plot_combined_series(dfs, names, 'average_waiting_time', 'traffic', labels,
                         'Average waiting time (sec)', scenario + '_wait', window=60, agg='mv')
    # plot trip completion
    plot_combined_series(dfs, names, 'number_arrived_car', 'traffic', labels,
                         'Trip completion number', scenario + '_tripcomp', window=60, agg='sum')
    # plot trip time
    plot_combined_series(dfs, names, 'duration_sec', 'trip', labels,
                         'Median trip time (sec)', scenario + '_triptime', window=60, agg='median')
    # plot trip waiting time
    plot_combined_series(dfs, names, 'wait_sec', 'trip', labels,
                         'Median trip waiting time (sec)', scenario + '_tripwait', window=60, agg='median')
    plot_combined_series(dfs, names, 'global_reward', 'control', labels,
                         'Step reward', scenario + '_reward', reward=True, window=6, agg='mv')
plot_eval_curve()

Greedy policy: -30.81
Multi-agent A2C: -8.65
