In [None]:
import sys
sys.path.append("..")
import pandas as pd

# Redistribution

In [None]:
import pandas as pd
from src import BASE_CONCEPT_PATH, PROCESSED_4, NEURONS_PER_LAYER, NUM_LAYERS
from src.visualization.results_utils import build_heatmap_data, filter_ablated

base_df = pd.read_csv(BASE_CONCEPT_PATH)
retrain_4_df = pd.read_csv(PROCESSED_4)
base_saliency_df = build_heatmap_data(base_df, NEURONS_PER_LAYER * NUM_LAYERS)
filtered_retrained_4_df = filter_ablated(retrain_4_df[['neuron-id','current_concepts']])
print(len(filtered_retrained_4_df))
retrained_4_saliency_df = build_heatmap_data(filtered_retrained_4_df, len(filtered_retrained_4_df))

In [None]:
# Number of neurons that show concept saliency > 0.5 after pruning and retraining
base_count_per_row = (base_saliency_df > 0.5).sum(axis=1)
retrained_count_per_row = (retrained_4_saliency_df > 0.5).sum(axis=1)
# Normalize with total number of neurons that were not ablated
base_saliency_pc = base_count_per_row / base_count_per_row.sum() * 100
retrained_saliency_pc = retrained_count_per_row/ retrained_count_per_row.sum() * 100
saliency_pc = pd.concat([base_saliency_pc, retrained_saliency_pc], axis=1).round(3)
saliency_pc.rename(columns={0: "Base", 1: "Retrained"}, inplace=True)
saliency_pc

# Correlation between similarity vs saliency

In [None]:
import pandas as pd
from src import PROCESSED_4, SIMILARITY_4, NEURONS_PER_LAYER, NUM_LAYERS
from src.visualization.results_utils import build_heatmap_data, build_sim_data

retrain_4_df = pd.read_csv(PROCESSED_4)
retrain_4_set_df = retrain_4_df[['neuron-id','current_concepts']]
retrain_4_heatdf = build_heatmap_data(retrain_4_set_df, NEURONS_PER_LAYER * NUM_LAYERS)

retrain_4_sim_df = pd.read_csv(SIMILARITY_4)
retrain_4_sim_heatdf = build_sim_data((retrain_4_sim_df))

In [None]:
import math
from src import NEURONS_PER_LAYER

df = pd.DataFrame(columns=['saliency', 'similarity'])
count = 0
for index, row in retrain_4_heatdf.iterrows():
    for column in range(NEURONS_PER_LAYER):
        concept_saliency = row[column]
        similarity = retrain_4_sim_heatdf.iloc[index][column]
        if not math.isnan(concept_saliency) and not math.isnan(similarity):
            df.loc[count] = [concept_saliency, similarity]
            count += 1
   
df.to_csv("data/processed/regression.csv")

In [None]:
import matplotlib.pyplot as plt

plt.scatter(df['similarity'], df['saliency'])
plt.ylabel('Saliency score')
plt.xlabel('Similarity score')
plt.title('Concept similarity versus saliency, post-remapping for location names')
# Get the current axes object
ax = plt.gca()
# Set the linewidth of the axes spines
ax.spines['top'].set_linewidth(2)     # Top border
ax.spines['bottom'].set_linewidth(2)  # Bottom border
ax.spines['left'].set_linewidth(2)    # Left border
ax.spines['right'].set_linewidth(2)   # Right border

plt.savefig('similarity-vs-saliency.pdf')

# Mean saliency and similarity

In [None]:
import scipy.stats as st
import numpy as np

print("Saliency")
print(f"Mean: {df['saliency'].mean()}")
interval = st.norm.interval(confidence=0.95, loc=np.mean(df['saliency']), scale=st.sem(df['saliency']))
print(f"Confidence interval: {interval}")
print(f"Standard deviation: {df['saliency'].std()}")
interval = st.norm.interval(confidence=0.95, loc=np.std(df['saliency']), scale=st.sem(df['saliency']))
print(f"Confidence interval: {interval}")

print("Similarity")
print(f"Mean: {df['similarity'].mean()}")
interval = st.norm.interval(confidence=0.95, loc=np.mean(df['similarity']), scale=st.sem(df['similarity']))
print(f"Confidence interval: {interval}")
print(f"Standard deviation: {df['similarity'].std()}")
interval = st.norm.interval(confidence=0.95, loc=np.std(df['similarity']), scale=st.sem(df['similarity']))
print(f"Confidence interval: {interval}")

## Concept saliency over retraining

In [None]:
import numpy as np
import scipy.stats as st
import math
from src import BASE_CONCEPT_PATH, NEURONS_PER_LAYER, NUM_LAYERS

base_df = pd.read_csv(BASE_CONCEPT_PATH)
base_set_df = base_df[['neuron-id','current_concepts']]
base_heatdf = build_heatmap_data(base_set_df, NEURONS_PER_LAYER * NUM_LAYERS)

# Calculate mean for base model

df = pd.DataFrame(columns=['layer', 'mean saliency', 'std', 'error'])
for index, row in base_heatdf.iterrows():
    row_clean = [0 if math.isnan(x) else x for x in row.tolist()]
    average = np.mean(row_clean)
    std = np.std(row_clean)
    (low, high) = st.norm.interval(confidence=0.95, loc=np.mean(row_clean), scale=st.sem(row_clean))
    error = abs(high - low) / 2
    df.loc[len(df)] = [len(df), average, std, error]
mean_saliency_base = df
mean_saliency_base

In [None]:
from src import PROCESSED_1, PROCESSED_2, PROCESSED_3, PROCESSED_4, NUM_LAYERS, NEURONS_PER_LAYER
from src.visualization.results_utils import build_heatmap_data, mean_saliency

retrain_1_df = pd.read_csv(PROCESSED_1)
retrain_1_set_df = retrain_1_df[['neuron-id','current_concepts']]
retrain_1_heatdf = build_heatmap_data(retrain_1_set_df, NEURONS_PER_LAYER * NUM_LAYERS)
mean_saliency_1 = mean_saliency(retrain_1_heatdf)

retrain_2_df = pd.read_csv(PROCESSED_2)
retrain_2_set_df = retrain_2_df[['neuron-id','current_concepts']]
retrain_2_heatdf = build_heatmap_data(retrain_2_set_df, NEURONS_PER_LAYER * NUM_LAYERS)
mean_saliency_2 = mean_saliency(retrain_2_heatdf)

retrain_3_df = pd.read_csv(PROCESSED_3)
retrain_3_set_df = retrain_3_df[['neuron-id','current_concepts']]
retrain_3_heatdf = build_heatmap_data(retrain_3_set_df, NEURONS_PER_LAYER * NUM_LAYERS)
mean_saliency_3 = mean_saliency(retrain_3_heatdf)

retrain_4_df = pd.read_csv(PROCESSED_4)
retrain_4_set_df = retrain_4_df[['neuron-id','current_concepts']]
retrain_4_heatdf = build_heatmap_data(retrain_4_set_df, NEURONS_PER_LAYER * NUM_LAYERS)
mean_saliency_4 = mean_saliency(retrain_4_heatdf)

In [None]:
merged_table = mean_saliency_base[['layer', 'mean saliency', 'error']].merge(mean_saliency_1, on='layer', suffixes=('', '_1'))
merged_table = merged_table.merge(mean_saliency_2, on='layer', suffixes=('', '_2'))
merged_table = merged_table.merge(mean_saliency_3, on='layer', suffixes=('', '_3'))
merged_table = merged_table.merge(mean_saliency_4, on='layer', suffixes=('', '_4'))
merged_table

In [None]:
import matplotlib.pyplot as plt

# Create a new figure and axis
fig, ax = plt.subplots(figsize=(6,6))

# Iterate over each row in the dataframe
time_periods = np.arange(0, 5)
ax.set_xticks(time_periods)
ax.set_xticklabels(["Base", "2 epochs", "4 epochs", "6 epochs", "8 epochs"])

for _, row in merged_table.iterrows():
    layer = row['layer']
    mean_base = row['mean saliency']
    std_base = row['error']
    mean_1 = row['mean_saliency']
    std_1 = row['error_1']
    mean_2 = row['mean_saliency_2']
    std_2 = row['error_2']
    mean_3 = row['mean_saliency_3']
    std_3 = row['error_3']
    mean_4 = row['mean_saliency_4']
    std_4 = row['error_4']

    line_points = [mean_base, mean_1, mean_2, mean_3, mean_4]
    # Plot the line
    layer_label = f"Layer {round(layer)}"
    ax.plot(time_periods, line_points, marker='o', label=layer_label)

    # Plot the error bars
    ax.errorbar(time_periods, line_points, yerr=[std_base, std_1, std_2, std_3, std_4], color='black', linestyle='None', capsize=4, alpha=0.5)

plt.legend()  

# Set labels and title
ax.set_xlabel('Stage of retraining')
ax.set_ylabel('Mean concept saliency')
# ax.set_title('Mean saliency in model layers over retraining')

# Set the linewidth of the axes spines
ax.spines['top'].set_linewidth(2)     # Top border
ax.spines['bottom'].set_linewidth(2)  # Bottom border
ax.spines['left'].set_linewidth(2)    # Left border
ax.spines['right'].set_linewidth(2)   # Right border

# Show the plot
plt.savefig("saliency_over_retraining.pdf")

## Concept similarity over retraining

In [None]:
from src import SIMILARITY_1, SIMILARITY_2, SIMILARITY_3, SIMILARITY_4

retrain_1_sim_df = pd.read_csv(SIMILARITY_1)
retrain_1_sim_heatdf = build_sim_data(retrain_1_sim_df)
retrain_2_sim_df = pd.read_csv(SIMILARITY_2)
retrain_2_sim_heatdf = build_sim_data(retrain_2_sim_df)
retrain_3_sim_df = pd.read_csv(SIMILARITY_3)
retrain_3_sim_heatdf = build_sim_data(retrain_3_sim_df)
retrain_4_sim_df = pd.read_csv(SIMILARITY_4)
retrain_4_sim_heatdf = build_sim_data(retrain_4_sim_df)

In [None]:
from src.visualization.results_utils import mean_similarity

mean_sim_1 = mean_similarity(retrain_1_sim_heatdf)
mean_sim_2 = mean_similarity(retrain_2_sim_heatdf)
mean_sim_3 = mean_similarity(retrain_3_sim_heatdf)
mean_sim_4 = mean_similarity(retrain_4_sim_heatdf)

In [None]:
merged_table = mean_sim_1[['layer', 'mean_similarity', 'error']].merge(mean_sim_2, on='layer', suffixes=('', '_2'))
merged_table = merged_table.merge(mean_sim_3, on='layer', suffixes=('', '_3'))
merged_table = merged_table.merge(mean_sim_4, on='layer', suffixes=('', '_4'))
merged_table

In [None]:
import matplotlib.pyplot as plt

# Create a new figure and axis
fig, ax = plt.subplots(figsize=(4,4))

# Iterate over each row in the dataframe
time_periods = np.arange(1, 5)
ax.set_xticks(time_periods)
ax.set_xticklabels(["2 epochs", "4 epochs", "6 epochs", "8 epochs"])

for _, row in merged_table.iterrows():
    layer = row['layer']
    mean_1 = row['mean_similarity']
    std_1 = row['error']
    mean_2 = row['mean_similarity_2']
    std_2 = row['error_2']
    mean_3 = row['mean_similarity_3']
    std_3 = row['error_3']
    mean_4 = row['mean_similarity_4']
    std_4 = row['error_4']

    line_points = [mean_1, mean_2, mean_3, mean_4]
    # Plot the line
    layer_label = f"Layer {round(layer)}"
    ax.plot(time_periods, line_points, marker='o', label=layer_label)

    # Plot the error bars
    ax.errorbar(time_periods, line_points, yerr=[std_1, std_2, std_3, std_4], color='black', linestyle='None', capsize=4, alpha=0.5)

legend = plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.25), ncol=len(df) // 2)
legend.get_frame().set_linewidth(0) 

# Set labels and title
ax.set_xlabel('Stage of retraining')
ax.set_ylabel('Mean concept similarity')
# ax.set_title('Mean saliency in model layers over retraining')

# Set the linewidth of the axes spines
ax.spines['top'].set_linewidth(2)     # Top border
ax.spines['bottom'].set_linewidth(2)  # Bottom border
ax.spines['left'].set_linewidth(2)    # Left border
ax.spines['right'].set_linewidth(2)   # Right border

# Show the plot
plt.savefig("similarity_over_retraining.pdf", bbox_inches='tight')

# Examine random HATs

In [None]:
from src import PROCESSED_4, NEURONS_PER_LAYER, NUM_LAYERS
import ast

retrain_4_df = pd.read_csv(PROCESSED_4)
df = retrain_4_df[['neuron-id','current_concepts']]
retrain_4_heatdf = build_heatmap_data(df, NEURONS_PER_LAYER * NUM_LAYERS)

new_df = pd.DataFrame(columns=['neuron-id', 'HAT'])
for index, row in df.iterrows():
    hat = [word for word, _ in ast.literal_eval(row['current_concepts'])][0]
    if hat.isalpha():
        new_df.at[index, "neuron-id"] = row['neuron-id']
        new_df.at[index, "HAT"] = hat

new_df

In [None]:
from src.visualization.results_utils import get_random_hats
# Get random HATs from each layer

print(get_random_hats(new_df, retrain_4_heatdf, 1))
print(get_random_hats(new_df, retrain_4_heatdf, 2))
print(get_random_hats(new_df, retrain_4_heatdf, 3))
print(get_random_hats(new_df, retrain_4_heatdf, 4))
print(get_random_hats(new_df, retrain_4_heatdf, 5))
print(get_random_hats(new_df, retrain_4_heatdf, 6))