In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from utils import History, decode_image

In [None]:
history_path = "test.jsonl"
# history_path = "results/morenov-embed-cut.jsonl"
# history_path = "results/winrate.jsonl"

history = History()
history.load_history(history_path)

In [None]:
history_df = pd.DataFrame(history.entries)
print(history_df.shape)
history_df.head()

In [None]:
# Plot the value counts of the novelty score column in a bar chart with x axis sorted by key
def plot_novelty_counts(ax, history_df: pd.DataFrame):
    ax.set_xlabel("Novelty Score")
    ax.set_ylabel("Count")
    ax.set_title("Novelty Score Distribution")
    results_dict = history_df['novelty_score'].value_counts().sort_index().to_dict()
    ax.bar(
        ["Code Error"],
        [100 - len(history_df)],
        color="gray"
    )
    ax.bar(
        [str(k) for k in results_dict.keys()],
        results_dict.values()
    )
    return ax

In [None]:
def compare_novelty_trends(ax, history_paths: list[str]):
    for history_path in history_paths:
        history = History()
        history.load_history(history_path)
        df = pd.DataFrame(history.entries)

        avg_novelty = df.groupby("gen")["novelty_score"].mean()
        std_novelty = df.groupby("gen")["novelty_score"].std()

        ax.errorbar(
            avg_novelty.index,
            avg_novelty.values,
            yerr=std_novelty.values,
            marker="o",
            label=history_path)
    
    ax.axhline(y=6, color="gray", linestyle="--")

    ax.set_xlabel("Generation")
    ax.set_ylabel("Average Novelty Score")
    ax.legend()
    ax.set_title("Novelty Score Trend")
    return ax

def novelty_plot():
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    axes[0] = plot_novelty_counts(axes[0], history_df)
    axes[1] = compare_novelty_trends(
        axes[1],
        ["results/scoreprompt.jsonl"]
    )
    plt.show()

novelty_plot()

In [None]:
def visualize_human_eval(pairwise_path: str, history_path: str):
    pairwise_matrix = np.load(pairwise_path)
    print((pairwise_matrix != -1).sum())
    print((pairwise_matrix == 1).sum())
    print((pairwise_matrix == 2).sum())
    print((pairwise_matrix == 0).sum())

    history = History()
    history.load_history(history_path)
    history_df = pd.DataFrame(history.entries)

    gen_results = {}
    for i in range(pairwise_matrix.shape[0]):
        for j in range(pairwise_matrix.shape[1]):
            if i == j:
                continue
            winner = pairwise_matrix[i, j]
            gen_i = history_df[history_df["entry_id"] == i]["gen"].values[0]
            gen_j = history_df[history_df["entry_id"] == j]["gen"].values[0]
            
            if gen_i not in gen_results:
                gen_results[gen_i] = []
            if gen_j not in gen_results:
                gen_results[gen_j] = []
            
            if winner == 0:
                gen_results[gen_i].append(1)
                gen_results[gen_j].append(0)
            elif winner == 1:
                gen_results[gen_i].append(0)
                gen_results[gen_j].append(1)

    avg_winrates = {gen: np.mean(results) for gen, results in gen_results.items()}
    fig, ax = plt.subplots()

    ax.bar(
        avg_winrates.keys(),
        avg_winrates.values())
    ax.set_xlabel("Generation")
    ax.set_ylabel("Winrate")
    ax.set_title("Human Evaluation Winrate by Generation")
    # plt.savefig("figures/human_winrate.png")
    plt.show()


    # plt.imshow(pairwise_matrix, cmap='bwr', vmin=-10, vmax=10)

visualize_human_eval("results/winrate.npy", "results/winrate.jsonl")

In [None]:
for row in history.entries:
    img = decode_image(row["base64_img"])
    display(img)
    print(f"ID: {row['entry_id']}")
    print(f"Parents: {row['parents']}")
    print(f"Generation: {row['gen']}")
    print(f"Novelty Score: {row['novelty_score']}")
    print(f"Rationale: {row['rationale']}")

In [None]:
def show_image_and_parents(entry_id: int, history_df: pd.DataFrame):
    entry = history_df[history_df["entry_id"] == entry_id].iloc[0]
    img = decode_image(entry["base64_img"])
    print(f"Entry ID: {entry['entry_id']}")
    print(f"Generation: {entry['gen']}")
    print(f"Parents: {entry['parents']}")
    display(img)

    parents = history_df[history_df["entry_id"].isin(entry["parents"])]
    for _, parent in parents.iterrows():
        img = decode_image(parent["base64_img"])
        print(f"Parent ID: {parent['entry_id']}")
        print(f"Generation: {parent['gen']}")
        display(img)

show_image_and_parents(8, history_df)