In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from utils import decode_image

In [None]:
results_path = "results/rubric.csv"
history_df = pd.read_csv(results_path)

# The parents row is saved as a string representation of a list of ints; convert it back to a list of ints using the ast library
import ast
history_df["parents"] = history_df["parents"].apply(ast.literal_eval)

In [None]:
print(history_df.shape)
history_df.head()

In [None]:
print(history_df["phenotype"].isna().sum())
print(history_df["novelty_score"].sum() / len(history_df))

In [None]:
# Plot the value counts of the novelty score column in a bar chart with x axis sorted by key
def plot_novelty_counts(ax, history_df: pd.DataFrame):
    ax.set_xlabel("Novelty Score")
    ax.set_ylabel("Count")
    ax.set_title("Novelty Score Distribution")
    ax.bar(
        ["Code Error"],
        [len(history_df[history_df["novelty_score"].isna()])],
        color="gray"
    )
    counts = history_df[history_df["novelty_score"].notna()]["novelty_score"].value_counts().sort_index()
    ax.bar(
        counts.index.astype(str),
        counts.values
    )
    return ax

In [None]:
def plot_novelty_trend(ax, history_df: pd.DataFrame, threshold: float):
    avg_novelty = history_df.groupby("gen")["novelty_score"].mean()
    max_novelty = history_df.groupby("gen")["novelty_score"].max()
    min_novelty = history_df.groupby("gen")["novelty_score"].min()

    ax.plot(
        max_novelty.index,
        max_novelty.values,
        marker="o",
        label="max"
    )

    ax.errorbar(
        avg_novelty.index,
        avg_novelty.values,
        marker="o",
        label="mean")

    ax.plot(
        min_novelty.index,
        min_novelty.values,
        marker="o",
        label="min"
    )

    ax.axhline(y=threshold, color="gray", linestyle="--", label="threshold")

    ax.set_xticks(range(history_df["gen"].min(), history_df["gen"].max() + 1))
    ax.set_xlabel("Generation")
    ax.set_ylabel("Novelty Score")
    ax.legend()
    ax.set_title("Novelty Score Trend")
    return ax

def novelty_plot():
    threshold = 20.0
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    axes[0] = plot_novelty_counts(axes[0], history_df)
    axes[1] = plot_novelty_trend(axes[1], history_df, threshold)
    plt.show()

novelty_plot()

In [None]:
for _, row in history_df.iterrows():
    if not pd.isna(row["phenotype"]):
        img = decode_image(row["phenotype"])
        display(img)
    else:
        print("No image available")
    print(f"ID: {row['entry_id']}")
    print(f"Parents: {row['parents']}")
    print(f"Generation: {row['gen']}")
    print(f"Novelty Score: {row['novelty_score']}")
    print(f"Rationale: {row['rationale']}")

In [None]:
def show_image_and_parents(entry_id: int, history_df: pd.DataFrame):
    entry = history_df[history_df["entry_id"] == entry_id].iloc[0]
    print(f"Entry ID: {entry['entry_id']}")
    print(f"Generation: {entry['gen']}")
    print(f"Parents: {entry['parents']}")

    if entry["phenotype"] is not None:
        img = decode_image(entry["phenotype"])
        display(img)

    parents = history_df[history_df["entry_id"].isin(entry["parents"])]
    for _, parent in parents.iterrows():
        img = decode_image(parent["phenotype"])
        print(f"Parent ID: {parent['entry_id']}")
        print(f"Generation: {parent['gen']}")
        display(img)

show_image_and_parents(89, history_df)