## Setup

In [1]:
import os
import json
import glob
import torch
import re
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

from IPython import get_ipython
from IPython.display import clear_output, display
#import seaborn as sns
import matplotlib.pyplot as plt
from utils.data_processing import (
    read_json_file,
    load_edge_scores_into_dictionary,
    load_faithfulness_scores_into_df,
    get_ckpts,
    load_metrics,
    compute_ged,
    compute_weighted_ged,
    compute_gtd,
    compute_jaccard_similarity_to_reference,
    compute_jaccard_similarity,
    aggregate_metrics_to_tensors_step_number,
    get_ckpts
)

## Retrieve & Process Data

### Circuit Data

In [2]:
folder_path = 'results/graphs/pythia-160m-seed1/ioi'
df = load_edge_scores_into_dictionary(folder_path)
clear_output()

In [3]:
df_ff_seed_1234 = load_faithfulness_scores_into_df('results/faithfulness/pythia-160m/ioi')
df_ff_seed_1 = load_faithfulness_scores_into_df('results/faithfulness/pythia-160m-seed1/ioi', seed_name='seed1')
df_ff_seed_2 = load_faithfulness_scores_into_df('results/faithfulness/pythia-160m-seed2/ioi', seed_name='seed2')
df_ff_seed_3 = load_faithfulness_scores_into_df('results/faithfulness/pythia-160m-seed3/ioi', seed_name='seed3')
clear_output()

In [4]:
# concatenate all seeds
df_ff = pd.concat([df_ff_seed_1234, df_ff_seed_1, df_ff_seed_2, df_ff_seed_3])
df_ff.head()

Unnamed: 0,size,faithfulness_score,checkpoint,seed
4,1,1.0,4000,seed1234
3,3,2.28125,4000,seed1234
2,6,1.414062,4000,seed1234
1,13,1.046875,4000,seed1234
0,25,1.578125,4000,seed1234


In [6]:
# graph faithfulness scores for each seed for checkpoint 143000
checkpoint = 143000
sub_df_ff = df_ff[df_ff['checkpoint'] == checkpoint]
fig = px.line(sub_df_ff, x='size', y='faithfulness_score', color='seed')
fig.update_layout(title=f'Faithfulness scores for each 160m seed at checkpoint {checkpoint}', xaxis_title='Size', yaxis_title='Faithfulness score')
fig.show()