In [36]:
import pandas as pd
import glob

files = glob.glob("../train_logs/*.csv")

dfs = []
for file in files:
    info = file.split("_")
    num_nodes = info[4]
    model = info[3]
    seed = info[5].split(".")[0]

    b = pd.read_csv(file)
    b['num_nodes'] = num_nodes
    b['model'] = model
    b['seed'] = seed

    dfs.append(b)

data = pd.concat(dfs)
data["num_nodes"] = pd.to_numeric(data.num_nodes)
data["seed"] = pd.to_numeric(data.seed)

In [2]:
def get_traces(fig):
    traces = []
    
    for trace in range(len(fig["data"])):
        traces.append(fig["data"][trace])
    
    return traces

def del_duplicate_legend(fig):
    names = set()
    fig.for_each_trace( 
    lambda trace:
        trace.update(showlegend=False)
        if (trace.name in names) else names.add(trace.name))

In [43]:
average_after_seed = data.groupby(['model', "Epoch", "num_nodes"]).mean().reset_index()


Unnamed: 0,model,Epoch,num_nodes,Loss,Cost,Advantage,Time,seed
5106,vrp,0,20,-55.034569,12.772479,1.222014,2.628204,96.0
5107,vrp,0,30,-151.919281,19.214766,2.054870,5.225970,96.0
5108,vrp,0,40,-340.470474,25.525448,3.292745,8.263191,96.0
5109,vrp,1,20,-12.029572,11.905854,0.232942,5.205371,96.0
5110,vrp,1,30,-107.295521,18.403673,1.556964,10.242987,96.0
...,...,...,...,...,...,...,...,...
7654,vrp,849,30,-1.132119,5.897308,0.003150,3131.280885,96.0
7655,vrp,849,40,0.409485,7.115989,-0.068389,4980.066335,96.0
7656,vrp,850,20,-0.650090,4.520659,0.000516,1637.008518,96.0
7657,vrp,850,30,-0.875455,5.933133,-0.013314,3135.135931,96.0


In [46]:
import plotly.express as px
from plotly.subplots import make_subplots

color_map = {
    "vrp": "red",
    "tsp": "blue",
    "irp": "green"
}

# these are averaged over the seed already
twenty_nodes = average_after_seed[(average_after_seed.num_nodes == 20)]
twenty_plot = px.line(twenty_nodes, x="Epoch", y="Cost", title="Hey", color="model", color_discrete_map = color_map, width=1000)
thirdy_nodes = average_after_seed[(average_after_seed.num_nodes == 30)]
thirdy_plot = px.line(thirdy_nodes, x="Epoch", y="Cost", title="Hey", color="model", color_discrete_map = color_map,width=1000)
fourty_nodes = average_after_seed[(average_after_seed.num_nodes == 40)]
fourty_plot = px.line(fourty_nodes, x="Epoch", y="Cost", title="Hey", color="model", color_discrete_map = color_map, width=1000)

twenty_traces = get_traces(twenty_plot)
thirdy_traces = get_traces(thirdy_plot)
fourty_traces = get_traces(fourty_plot)

plots = make_subplots(rows=2, cols=2, subplot_titles=["20 Nodes","30 Nodes","40 Nodes"])
for i in range(1,4): 
    plots['layout']['xaxis{}'.format(i)]['title']={'text': "Epoch", 'font_size': 14}
    plots['layout']['yaxis{}'.format(i)]['title']={'text': "Cost", 'font_size': 14}

for i in range(len(twenty_traces)):
    plots.append_trace(twenty_traces[i], row=1, col=1)
    plots.append_trace(thirdy_traces[i], row=1, col=2)
    plots.append_trace(fourty_traces[i], row=2, col=1)

plots.update_layout(
    height=800,
    width=1400,
    title_font_size=1,
    title="Cost to solve each environment during training over 850 training epochs (batch size 256, averaged over seeds)", 
    title_x = 0.5,
    font=dict(
        size=16,
    ),
    template="plotly_dark",
)

del_duplicate_legend(plots)
plots.show()

In [4]:
files = glob.glob("../reproduction_log/*.csv")


validation_results = []
for file in files:
    info = file.split("_")
    
    # skip generalization:
    if "in" in info:
        continue

    num_nodes = info[3]

    df = pd.read_csv(file)
    df['num_nodes'] = num_nodes

    validation_results.append(df)

combined_results = pd.concat(validation_results)
combined_results["num_nodes"] = pd.to_numeric(combined_results.num_nodes)
combined_results["Seed"] = pd.to_numeric(combined_results.Seed)

In [5]:
gen_result = []

for file in files:
    info = file.split("_")
    
    # skip generalization:
    if "in" not in info:
        continue

    num_nodes = info[4]
    env = info[7].split(".")[0]

    df = pd.read_csv(file)
    df['num_nodes'] = num_nodes
    df['Model'] = df['Model'] + "-20"
    df['env'] = env
    gen_result.append(df)

gen_result = pd.concat(gen_result)
gen_result["num_nodes"] = pd.to_numeric(gen_result.num_nodes)
gen_result["Seed"] = pd.to_numeric(gen_result.Seed)

In [6]:
import scipy.stats
import numpy as np

def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h

vrp_res = combined_results.query("Model == 'IRP-Agent' or Model == 'IRP-Random-Agent'")


def generate_bar_plot(df, colors, x, y , color):
    agg_data = []
    unique_vals = df[["Model", "num_nodes"]].drop_duplicates()
    for _, row in unique_vals.iterrows():

        if x == "env":
            env = df[(df.Model == row[0]) & (df.num_nodes == row[1])]["env"].values[0]
        else:
            env = "None"

        data = df[(df.Model == row[0]) & (df.num_nodes == row[1])]["Mean Distance"]
        mean, lower_ci, upper_ci = mean_confidence_interval(data)

        agg_data.append(
            [row[0], row[1], mean, lower_ci, upper_ci, env]
        )
        
    agg_df = pd.DataFrame(agg_data, columns=["model", "num_nodes", "cost", "lower_ci", "upper_ci", "env"])
    agg_df.sort_values(by=["model"], inplace=True)
    fig = px.bar(agg_df, x=x, y=y,
                color=color, barmode='group',
                height=400, template="plotly_dark", color_discrete_sequence=colors)
    fig.update_traces(
        error_y={
            "type": "data",
            "symmetric": False,
            "array": agg_df["upper_ci"] - agg_df["cost"],
            "arrayminus": agg_df["cost"] - agg_df["lower_ci"],
        }
    )

    return fig



In [7]:
nodes_20_models = gen_result.query("Model == 'IRP-Agent-20' or Model == 'TSP-Agent-20' or Model == 'VRP-Agent-20'")
nodes_40_models = combined_results.query("Model == 'IRP-Agent' or Model == 'TSP-Agent' or Model == 'VRP-Agent'")
nodes_40_models.loc[nodes_40_models["Model"] == "IRP-Agent", "env"] = "IRP"
nodes_40_models.loc[nodes_40_models.Model == "TSP-Agent", "env"] = "TSP"
nodes_40_models.loc[nodes_40_models.Model == "VRP-Agent", "env"] = "VRP"

nodes_40_models = nodes_40_models[nodes_40_models.num_nodes == 40]

general_plot_df = pd.concat([nodes_20_models, nodes_40_models])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [19]:
# hacky way to force plotly to group bars even with nan values 
# and force legend order
# and use express plots for subplots

# data of models evaled in their trained env (env and num nodes)
tsp_res = combined_results.query("Model == 'TSP-Agent' or Model == 'TSP-Random-Agent'")
vrp_res = combined_results.query("Model == 'VRP-Agent' or Model == 'VRP-Random-Agent'")
irp_res = combined_results.query("Model == 'IRP-Agent' or Model == 'IRP-Random-Agent'")

# fig of the data above
tsp_fig = generate_bar_plot(tsp_res, ["#636efa", "#EF553B"], "num_nodes", "cost", "model")
vrp_fig = generate_bar_plot(vrp_res, ["#1F77B4", "#E45756"], "num_nodes", "cost", "model")
irp_fig = generate_bar_plot(irp_res, ["#3366CC", "#DC3912"], "num_nodes", "cost", "model")
general_fig = generate_bar_plot(general_plot_df, ["maroon", "teal"], "env", "cost", "model" )

for i in range(0, 6, 2):
    general_fig._data_objs[i]["offsetgroup"] = "IRP-Agent-20"
    general_fig._data_objs[i+1]["offsetgroup"] = "IRP-Agent"
    general_fig._data_objs[i]["legendgroup"] = "20"
    general_fig._data_objs[i+1]["legendgroup"] = "40"
    general_fig._data_objs[i]["name"] = "Trained on 20 nodes"
    general_fig._data_objs[i+1]["name"] = "Trained on 40 nodes"



tsp_res_traces = get_traces(tsp_fig)
vrp_res_traces = get_traces(vrp_fig)
irp_res_traces = get_traces(irp_fig)
gen_res_traces = get_traces(general_fig)
plots = make_subplots(rows=2, cols=2, subplot_titles=["TSP-Environment","VRP-Environment","IRP-Environment", "Generalizability"])
for i in range(1,4):
    plots['layout']['xaxis{}'.format(i)]['title']={'text': "Number of nodes", 'font_size': 14}
    plots['layout']['yaxis{}'.format(i)]['title']={'text': "Cost", 'font_size': 14}

plots['layout']['xaxis4'.format(i)]['title']={'text': "Environment", 'font_size': 14}
plots['layout']['yaxis4'.format(i)]['title']={'text': "Cost", 'font_size': 14}

for i in range(len(tsp_res_traces)):
    plots.add_trace(tsp_res_traces[i], row=1, col=1)

for i in range(len(tsp_res_traces)):
    plots.add_trace(vrp_res_traces[i], row=1, col=2)

for i in range(len(tsp_res_traces)):
    plots.add_trace(irp_res_traces[i], row=2, col=1)

for i in range(len(gen_res_traces)):
    plots.add_trace(gen_res_traces[i], row=2, col=2)


plots.update_layout(
    height=800,
    width=1400,
    title="Comparison of trained agents vs random agent", 
    title_x = 0.5,
    font=dict(
        size=16,
    ),
    template="plotly_dark",
)

del_duplicate_legend(plots)
plots.show()