In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

RESULTS_DIR = "./results"

all_results: dict[str, dict[int, list[dict]]] = {
    "old 2d": {}, 
    "new 2d": {},
    "old 3d": {},
    "new 3d": {}
}

In [None]:
# Store a list of the models where the new encoding differed from the old encoding
invalid = []
valid = []
seen_contacts = {}

# Loop over each file in the results directory
for filepath in os.listdir(RESULTS_DIR):
    if not filepath.endswith(".csv"):
        continue
    
    # Read the csv file
    with open(os.path.join(RESULTS_DIR, filepath)) as f:

        # Ignore it if it's empty
        lines = f.readlines()
        if len(lines) == 1:
            continue

        # Ignore the "_(new|old).csv"
        model_name = filepath[:-8]

        # Determin the encoding type
        old = "old" in filepath
        if "_2d" in filepath:
            result_type = "old 2d" if old else "new 2d"
        else:
            result_type = "old 3d" if old else "new 3d"
        
        rows = [list(map(float, line.split(","))) for line in lines[1:]]

        # Check if the new encoding is the same as the old encoding
        contacts = int(rows[0][2])
        if model_name not in seen_contacts:
            seen_contacts[model_name] = contacts
        else:
            if contacts != seen_contacts[model_name]:
                invalid.append(model_name)
            else:
                valid.append(model_name)

        # Read the details
        for row in rows:
            length = row[0]

            # Add or increment the result
            results = all_results[result_type]
            if length not in results:
                results[length] = []
            results[length].append({
                "model_name": model_name,
                "time": float(row[1]),
                "variables": int(row[3]),
                "clauses": int(row[4])
            })

In [None]:
# Remove the dimension from the filename
invalid_sequences = list(set(x[:-3] for x in invalid))
if len(invalid_sequences) != len(invalid) / 2:
    print("There were some sequences that were correct for one dimension but not the other")


def get_sequence_from_inputs(filename: str) -> str:
    with open(f"input/{filename}") as f:
        return f.readline()[:-1]


def get_contacts_from_results(filename: str) -> str:
    with open(f"results/{filename}") as f:
        f.readline()
        results = f.readline()
        return int(results.split(",")[-1])

    
# Print out the invalid sequences
for filename in sorted(invalid_sequences):
    sequence = get_sequence_from_inputs(filename)
    print(f"{filename}: {sequence}")
    # for encoding in ["2d_old", "2d_new", "3d_old", "3d_new"]:
    #     contacts = get_contacts_from_results(f"{filename}_{encoding}.csv")
    #     print(f"{encoding} {contacts = }")
    

# print("Invalid sequences: ")
# invalid.sort()
# print(invalid)

# print("Valid sequences: ")
# valid.sort()
# print(valid)

In [None]:
def plot_dimension_data(dimension: int):
    if dimension == 2:
        keys = ["old 2d", "new 2d"]
    else:
        keys = ["old 3d", "new 3d"]
    
    fig, ax_left = plt.subplots(figsize=(16, 10))
    ax_right = ax_left.twinx()

    for key in keys:
        data = []
        for length, models in all_results[key].items():
            count = 0
            total_time = 0

            # Go through all the models of that length and find the average time
            models = [m for m in models if m["model_name"] not in invalid]
            data.append((
                int(length),
                round(np.mean([m["time"] for m in models]), 4),
                round(np.mean([m["variables"] for m in models]), 4),
                round(np.mean([m["clauses"] for m in models]), 4),
            ))

        # Sort the the data of each encoding type by the length of the sequences
        data.sort(key=lambda x:x[0])

        # Graph the data
        color = "blue" if "new" in key else "orange"
        names, values, variables, clauses = zip(*data)
        ax_left.plot(names, values, color=color, marker="o", label=f"{key} time")
        ax_right.plot(names, variables, ":", color=color, label=f"{key} variables")
        ax_right.plot(names, clauses, "--", color=color, label=f"{key} clauses")
    
    ax_left.set_yscale("log")
    ax_left.set_ylabel("Time (s)")

    ax_right.set_yscale("log")
    ax_right.set_ylabel("Variables")
    # ax_left.grid(True, which="minor")
    ax_left.set_xlabel("Sequence Length")

    ax_left.legend()
    ax_right.legend()
    plt.show()

In [None]:
plot_dimension_data(2)

In [None]:
plot_dimension_data(3)

# New graph

Same thing as above, have 6 curves
- Average runtime
- Average number of variables
- Average number of clauses

One graph for 2D, one graph for 3D

Cactus plot
Fix an encoding
Solve all the instances, for each instance record how long it took
Then sort the times, and plot cumulative time as we go through sorted list

If we have instances [a, b, c, d]
Solving times are [3, 10, 2, 1] respectively
Sorted case is [1, 2, 3, 10]
Data points are [(1, 1), (2, 3), (3, 6), (4, 16)]
Do this for new and old encoding, on the same picture, and instances can be of different lengths

x axis: 