In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

ENCODING = "./results/encoding"
POLICY_DIR = "./results/policy"
SEARCH_DIR = "./results/search"
COLOURS = ["blue", "orange", "green", "red"]
all_results: dict[str, dict[int, list[dict]]] = {}

In [10]:
# Get a list of the results from a directory
def get_result_dicts(dir_path: str) -> list[dict[str, float | str]]:
    results = []
    for file_name in filter(lambda x: x.endswith(".csv"), os.listdir(dir_path)):
        # Open the csv file
        file_dict = pd.read_csv(os.path.join(dir_path, file_name)).to_dict()
        # For each header in the csv where (key1=name,val1=dict(key=row_num,val=val_at_row))
        for key1, val1 in file_dict.items():
            # If the value is a string, convert from a dict of strings to 1 string
            if type(val1[0]) == str:
                file_dict[key1] = val1[0]
            # Else find the average of the values
            else:
                values = [v for v in val1.values()]
                file_dict[key1] = sum(values) / len(values)
        results.append(file_dict)
    return results

In [None]:
def plot_data(rseult_dicts: list[dict[str, float | str]], dimension: int = 2):
    
    fig, ax_left = plt.subplots(figsize=(16, 10))
    ax_right = ax_left.twinx()

    for key in all_results:
        if not f"{dimension}d" in key:
            continue
        data = []
        for length, models in all_results[key].items():

            # Go through all the models of that length and find the average time
            # models = [m for m in models if m["model_name"] not in invalid]
            data.append((
                int(length),
                round(np.mean([m["time"] for m in models]), 4),
                round(np.mean([m["variables"] for m in models]), 4),
                round(np.mean([m["clauses"] for m in models]), 4),
            ))

        # Sort the the data of each encoding type by the length of the sequences
        data.sort(key=lambda x:x[0])

        # Graph the data
        colour = COLOURS[int(key[-1])] # Colour the line based off the version
        names, values, variables, clauses = zip(*data)
        ax_left.plot(names, values, color=colour, marker="o", label=f"{key} time")
        ax_right.plot(names, variables, ":", color=colour, label=f"{key} variables")
        ax_right.plot(names, clauses, "--", color=colour, label=f"{key} clauses")
    
    ax_left.set_yscale("log")
    ax_left.set_ylabel("Time (s)")

    ax_right.set_yscale("log")
    ax_right.set_ylabel("Count")
    # ax_left.grid(True, which="minor")
    ax_left.set_xlabel("Sequence Length")

    ax_left.legend(loc="upper left")
    ax_right.legend(loc="lower right")
    plt.show()

In [None]:
plot_dimension_data(2)

In [None]:
plot_dimension_data(3)

In [None]:
# Loop through all the data
for encoding_type, encoding_results in all_results.items():
    
    # Get all the times
    times = []
    for length_times in encoding_results.values():
        times += [t["time"] for t in length_times]
    times.sort()
    plt.plot(times)
plt.show()


# New graph

Same thing as above, have 6 curves
- Average runtime
- Average number of variables
- Average number of clauses

One graph for 2D, one graph for 3D

Cactus plot
Fix an encoding
Solve all the instances, for each instance record how long it took
Then sort the times, and plot cumulative time as we go through sorted list

If we have instances [a, b, c, d]
Solving times are [3, 10, 2, 1] respectively
Sorted case is [1, 2, 3, 10]
Data points are [(1, 1), (2, 3), (3, 6), (4, 16)]
Do this for new and old encoding, on the same picture, and instances can be of different lengths

x axis: 