In [1]:
import pandas as pd

# Load the dataframes from JSON files
df1 = pd.read_json("eval_results.json")
df2 = pd.read_json("improved_eval_results.json")

# Set 'question' as the index for all dataframes
df1.set_index('question', inplace=True)
df2.set_index('question', inplace=True)

# First, add suffixes to df1 and df2 columns to distinguish them
df1 = df1.add_suffix('_orig')
df2 = df2.add_suffix('_impr')

# Merge df1 and df2
df_final = df1.merge(df2, left_index=True, right_index=True)


# Assuming 'ground_truth' values are consistent across all dataframes and need to be uniquely handled
# Since we've now correctly suffixed all columns, we should only have one 'ground_truth' column with a suffix
# If 'ground_truth_orig' exists, we can rename it back to 'ground_truth' for clarity
if 'ground_truth_orig' in df_final.columns:
    df_final.rename(columns={'ground_truth_orig': 'ground_truth'}, inplace=True)

# Sort the columns alphabetically
df = df_final.sort_index(axis=1)

df.reset_index(inplace=True)


In [2]:
# Start Renumics Spotlight for visualization
from renumics import spotlight
spotlight.show(df, port=8888, layout="spotlight-layout.json")

VBox(children=(Label(value='Spotlight running on http://127.0.0.1:8888/'), HBox(children=(Button(description='…