In [None]:
import argparse
import csv
import matplotlib.pyplot as plt
import glob
import os
import json
import seaborn as sns
import pandas as pd
import mpld3
from IPython import display

In [None]:
from process_log import Tags, Log, Epochs

In [None]:
leonhard_directory = "../logs/island_scaling_Nov_15_003228"

In [None]:
tags = Tags("tags.hpp")

In [None]:
all_names = os.listdir(leonhard_directory)

# Validate JSON
json_file = list(filter(lambda x: ".json" in x, all_names))
if len(json_file) == 0:
    print("Could not find JSON file in directory {}".format(leonhard_directory))
    exit(1)
if len(json_file) > 1:
    print("Found multiple JSON files ({}) in the directory {}".format(json_file, leonhard_directory))
    exit(1)
json_file = json_file[0]
with open(os.path.join(leonhard_directory, json_file)) as file:
    json_file = json.load(file)
    repetitions = json_file["repetitions"]

all_names = list(filter(lambda x: os.path.isdir(os.path.join(leonhard_directory, x)), all_names))
unique_names = list(set(map(lambda x: "_".join(x.split("_")[:-1]), all_names)))
unique_names

In [None]:
df = None
for run_name in unique_names:
    n = run_name.split("_")[0]
    data = run_name.split("_")[1]
    for repetition in range(repetitions):
        folder_name = run_name + "_" + str(repetition)
        folder_contents = os.listdir(os.path.join(leonhard_directory, folder_name))
        folder_contents = list(filter(lambda x: ".bin" in x, folder_contents))
        logs = [Log(os.path.join(leonhard_directory, folder_name, path), tags) for path in folder_contents]
        for filename in folder_contents:
            log = Log(os.path.join(leonhard_directory, folder_name, filename), tags)
            rank = int(filename.split("_")[-2])
            epochs = Epochs(log, tags)
            if df is None:
                df = pd.DataFrame(epochs.get_fitness_vs_time_dataframe(), columns=["fitness", "wall clock time", "epoch"])
                df["rank"] = rank
                df["rep"] = repetition
                df["n"] = n
                df["data"] = data
            else:
                df2 = pd.DataFrame(epochs.get_fitness_vs_time_dataframe(), columns=["fitness", "wall clock time", "epoch"])
                df2["rank"] = rank
                df2["rep"] = repetition
                df2["n"] = n
                df2["data"] = data
                df = df.append(df2, ignore_index=True)

In [None]:
df.to_csv("island_scaling_fitness_time.gz", compression="gzip")

In [None]:
df = pd.read_csv("island_scaling_fitness_time.gz")
df = df.drop(columns="Unnamed: 0")
df

In [None]:
# Take out rank variation
new_df = df.groupby(["epoch", "rep", "n", "data"], as_index=False).agg({"fitness" : "min", "wall clock time" : "max"})
new_df = new_df.drop(columns="wall clock time")
new_df

In [None]:
fake_df = new_df.groupby(["epoch", "rep", "n", "data", "fitness"], as_index=False).max()
fake_df

In [None]:
fake_df[fake_df.n == 1][fake_df.epoch == 0][fake_df.data == "a280csv"]

In [None]:
import matplotlib.ticker as ticker

fig, ax = plt.subplots()
sns.lineplot(ax=ax, x="epoch", y="fitness", hue="n", legend='full', data=new_df[new_df.data == "a280csv"])
ax.set_xlim(500, 2500)
ax.set_ylim(4000, 14000)

In [None]:
fig.savefig("island_scaling_a280_part.svg")

In [None]:
fig, ax = plt.subplots()
sns.lineplot(ax=ax, x="epoch", y="fitness", hue="n", legend='full', data=new_df[new_df.data == "berlin52csv"])
ax.set_title("Island Model - TSP Graph a280")
ax.set_xlim(0, 2500)
ax.set_ylim(7500, 12500)

In [None]:
fig.savefig("island_scaling_berlin52_full.svg")

In [None]:
fig, ax = plt.subplots()
sns.lineplot(ax=ax, x="epoch", y="fitness", hue="n", legend='full', data=new_df[new_df.data == "berlin52csv"][new_df.epoch % 100 == 0])
ax.set_title("Island Model - TSP Graph berlin52")
ax.set_xlim(0, 1000)
ax.set_ylim(7500, 11000)

In [None]:
fig.savefig("island_scaling_berlin52_fast.svg")