## Analyzing Probing Results for multilingual LMs


In [None]:
import os
import json
from collections import Counter
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [None]:
base_dir = os.getcwd()
mpararel_en_path = os.path.join(base_dir, "results/mpararel_en_P101_bert_base_multilingual_cased.json")
mpararel_fr_path = os.path.join(base_dir, "results/mpararel_fr_P101_bert_base_multilingual_cased.json")

with open(mpararel_en_path, "r") as f:
    mpararel_en = json.load(f)["en"]["P101"]

with open(mpararel_fr_path, "r") as f:
    mpararel_fr = json.load(f)["fr"]["P101"]

In [None]:
def plot_neuron_distribution(coarse_kn_counter_bag, refined_kn_counter_bag, layers, title):
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=layers,
        y=np.array([coarse_kn_counter_bag[i] for i in range(len(layers))]),
        name="coarse knowledge neurons",
    ))
    fig.add_trace(go.Bar(
        x=layers,
        y=np.array([refined_kn_counter_bag[i] for i in range(len(layers))]),
        name="refined knowledge neurons",
    ))
    fig.update_layout(
        title=title,
        barmode="group",
        xaxis_title="Layer",
        yaxis_title="Percentage"
    )
    fig.show()

In [None]:
# analysis for `en` + `P101` results
coarse_kn_counter_bag_en = Counter()
refined_kn_counter_bag_en = Counter()
total_coarse_neurons = 0
total_refined_neurons = 0

for _, coarse_neurons_list, refined_neurons in mpararel_en:
    # tracking coarse knowledge neurons
    for coarse_neurons in coarse_neurons_list:
        coarse_kn_counter_bag_en.update([neuron[0] for neuron in coarse_neurons])
        total_coarse_neurons += len(coarse_neurons)
    
    # tracking refined knowledge neurons
    refined_kn_counter_bag_en.update([neuron[0] for neuron in refined_neurons])
    total_refined_neurons += len(refined_neurons)

for k, v in coarse_kn_counter_bag_en.items():
    coarse_kn_counter_bag_en[k] /= total_coarse_neurons

for k, v in refined_kn_counter_bag_en.items():
    refined_kn_counter_bag_en[k] /= total_coarse_neurons

layers = np.array([i + 1 for i in range(12)])
plot_neuron_distribution(
    coarse_kn_counter_bag_en, refined_kn_counter_bag_en, layers, 
    title="Knowledge neurons distribution for P101 relations in English"
)

In [None]:
# analysis for `fr` + `P101` results
coarse_kn_counter_bag_fr = Counter()
refined_kn_counter_bag_fr = Counter()
total_coarse_neurons = 0
total_refined_neurons = 0

for _, coarse_neurons_list, refined_neurons in mpararel_fr:
    # tracking coarse knowledge neurons
    for coarse_neurons in coarse_neurons_list:
        coarse_kn_counter_bag_fr.update([neuron[0] for neuron in coarse_neurons])
        total_coarse_neurons += len(coarse_neurons)
    
    # tracking refined knowledge neurons
    refined_kn_counter_bag_fr.update([neuron[0] for neuron in refined_neurons])
    total_refined_neurons += len(refined_neurons)

for k, v in coarse_kn_counter_bag_fr.items():
    coarse_kn_counter_bag_fr[k] /= total_coarse_neurons

for k, v in refined_kn_counter_bag_fr.items():
    refined_kn_counter_bag_fr[k] /= total_coarse_neurons

layers = np.array([i + 1 for i in range(12)])
plot_neuron_distribution(
    coarse_kn_counter_bag_fr, refined_kn_counter_bag_fr, layers, 
    title="Knowledge neurons distribution for P101 relations in French"
)

In [None]:
# analysis for `en` and `fr` + `P101` results
coarse_kn_counter_bag = coarse_kn_counter_bag_en & coarse_kn_counter_bag_fr
refined_kn_counter_bag = refined_kn_counter_bag_en & refined_kn_counter_bag_fr
total_coarse_neurons = 0
total_refined_neurons = 0

for k, v in coarse_kn_counter_bag.items():
    total_coarse_neurons += v

for k, v in refined_kn_counter_bag.items():
    total_refined_neurons += v
    
for k, v in coarse_kn_counter_bag.items():
    coarse_kn_counter_bag[k] /= total_coarse_neurons

for k, v in refined_kn_counter_bag.items():
    refined_kn_counter_bag[k] /= total_coarse_neurons

layers = np.array([i + 1 for i in range(12)])
plot_neuron_distribution(
    coarse_kn_counter_bag_fr, refined_kn_counter_bag_fr, layers, 
    title="Knowledge neurons distribution for P101 relations in English and French intersection"
)