In [1]:
import numpy as np
import pandas as pd
import utils
from neuron_explainer.activations.activations import ActivationRecordSliceParams, load_neuron

In [2]:
EXPLAINER_MODEL = "Meta-Llama-3.1-8B-Instruct"
INPUT_PATH = "inputs/test_neurons.csv"

neuron_df = pd.read_csv(INPUT_PATH)
neuron_df

Unnamed: 0,layer,neuron,score,explanation
0,0,286,0.663509,words related to comparison.
1,10,1838,0.676886,phrases describing positions or situations in...
2,20,193,0.455764,verbs indicating questioning or challenging be...
3,30,1685,0.537097,expressions of gratitude and agreeing to rece...
4,40,431,0.36575,"numbers related to time, dates, and measureme..."


In [3]:
modes = ["Original", "Summary", "Highlight", "HighlightSummary", "AVHS"]
new_results = {"layer":[], "neuron":[], "explainer":[]}
for mode in modes:
    new_results[mode] = []

for i, row in neuron_df.iterrows():
    layer = row["layer"]
    neuron = row["neuron"]
    new_results["layer"].append(layer)
    new_results["neuron"].append(neuron)
    new_results["explainer"].append(EXPLAINER_MODEL)
    neuron_record = load_neuron(layer, neuron)
    
    for mode in modes:
        token_results = await utils.get_explanation(mode=mode, neuron_record=neuron_record, explainer_model=EXPLAINER_MODEL, get_token_only = True)
        new_results[mode].append(token_results['total_tokens'])

new_df = pd.DataFrame(new_results)
new_df

Unnamed: 0,layer,neuron,explainer,Original,Summary,Highlight,HighlightSummary,AVHS
0,0,286,Meta-Llama-3.1-8B-Instruct,2377,949,887,999,1155
1,10,1838,Meta-Llama-3.1-8B-Instruct,2409,995,912,1053,1207
2,20,193,Meta-Llama-3.1-8B-Instruct,2434,985,933,1070,1221
3,30,1685,Meta-Llama-3.1-8B-Instruct,2290,1025,938,1113,1630
4,40,431,Meta-Llama-3.1-8B-Instruct,2523,1002,944,1055,1306


In [4]:
new_df.to_csv("test_results/"+INPUT_PATH.split("/")[-1].split(".")[0] + "_tokens.csv")

In [5]:
for mode in modes:
    avg = np.mean(new_df[mode])
    print(mode, avg)

Original 2406.6
Summary 991.2
Highlight 922.8
HighlightSummary 1058.0
AVHS 1303.8
