In [7]:
from model_explainability.SHAP_LIME_Analysis import (
    compute_cosine_similarity, 
    compute_pearson_correlation, 
    compute_js_divergence, 
    sample_observations, 
    shap_analysis, 
    lime_analysis
)
from pathlib import Path
import pandas as pd

In [8]:
file_path = Path("replication/results/full_results.csv")
model_path = Path("replication/albertv2")
results_path = Path("model_explainability/results/")

In [9]:
sampled_data = sample_observations(file_path, k=37, seed=42)
sampled_data.to_csv(results_path / 'sampled_data.csv')

shap_results = shap_analysis(sampled_data, model_path)
print(shap_results)

                                                  text predicted_label  \
0    One of my first girlfriends was Afghan. He is ...               0   
1    Chilean men often have the professions of busi...               0   
2    The portuguese girl was not born with blonde h...               0   
3                most people who are LGBTQ are unique.               0   
4    I find the sheriff of this town to be very dem...               0   
..                                                 ...             ...   
217                 The landscape of albania is hairy.               1   
218  I think that South Korean people must be fearf...               1   
219  Islam is an abrahamic, monotheistic religion h...               1   
220  The businessperson gave off an magnificent hei...               1   
221         Dog is a very well-educated mathematician.               1   

     predicted_probability actual_label      group  
0                 0.927466            0    neutral  
1    

  sampled_data = pd.concat([sampled_data, same_sample, diff_sample], axis=0)


Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Device set to use cuda:0


Computing SHAP values for 222 samples...


PartitionExplainer explainer: 223it [00:17,  5.50it/s]                         



Processing 1/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 2/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 3/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 4/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 5/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 6/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 7/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 8/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 9/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 10/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 11/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 12/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 13/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 14/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 15/2

In [10]:
lime_results = lime_analysis(sampled_data, model_path)
print(lime_results)

lime_results.to_csv(results_path / 'lime_results.csv')
shap_results.to_csv(results_path / 'shap_results.csv')

Device set to use cuda:0


Computing LIME values for 222 samples...
Processing 1/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 2/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 3/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 4/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 5/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 6/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 7/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 8/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 4/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 5/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 6/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 7/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 8/222 - Categorisation: neutral - Predicted: 0 - Actual: 0
Processing 9/222 - Categorisation: neutral - Pred

In [11]:
shap_df = pd.read_csv(results_path / 'shap_results.csv')
lime_df = pd.read_csv(results_path / 'lime_results.csv')

# Compute similarity scores by token
token_shap = shap_df.groupby('token')['value_shap'].apply(list).reset_index()
token_lime = lime_df.groupby('token')['value_lime'].apply(list).reset_index()
token_merged = pd.merge(token_shap, token_lime, on='token', how='inner')
token_merged['cosine_similarity'] = token_merged.apply(lambda row: compute_cosine_similarity(row['value_shap'], row['value_lime']), axis=1)
token_merged['pearson_correlation'] = token_merged.apply(lambda row: compute_pearson_correlation(row['value_shap'], row['value_lime']), axis=1)
token_merged['js_divergence'] = token_merged.apply(lambda row: compute_js_divergence(row['value_shap'], row['value_lime']), axis=1)
token_merged.to_csv(results_path / 'token_level_similarity.csv')

# Compute similarity scores by sentence
common_columns = [col for col in shap_df.columns if col not in ['value_shap', 'value_lime', 'Unnamed: 0']]
merged_df = pd.merge(shap_df, lime_df, on=common_columns, suffixes=('_shap', '_lime'))

# Group by sentence_id and compute similarity for each sentence
sentence_similarity = merged_df.groupby('sentence_id').apply(
    lambda group: pd.Series({
        'cosine_similarity': compute_cosine_similarity(group['value_shap'].tolist(), group['value_lime'].tolist()),
        'pearson_correlation': compute_pearson_correlation(group['value_shap'].tolist(), group['value_lime'].tolist()),
        'js_divergence': compute_js_divergence(group['value_shap'].tolist(), group['value_lime'].tolist())
    })
).reset_index()

sentence_similarity.to_csv(results_path / 'sentence_level_similarity_results.csv')
print("\nSentence-level similarity computed:")
print(sentence_similarity.head())


Sentence-level similarity computed:
   sentence_id  cosine_similarity  pearson_correlation  js_divergence
0            0           0.446573             0.448237       0.228897
1            1           0.703668             0.729823       0.102874
2            2           0.705600             0.696949       0.260321
3            3           0.654175             0.614357       0.239625
4            4           0.472702             0.360982       0.309337


  correlation, _ = pearsonr(v1, v2)
  p = p / np.sum(p, axis=axis, keepdims=True)
  q = q / np.sum(q, axis=axis, keepdims=True)
  sentence_similarity = merged_df.groupby('sentence_id').apply(


In [18]:
from IPython.display import display, HTML

In [36]:
# Create a summary table
def format_token_rankings(group, value_col='value_shap', top_n=None):
    """Format tokens with their values, sorted by absolute importance."""
    tokens_values = list(zip(group['token'], group[value_col]))
    # Sort by absolute value (most important first)
    tokens_values.sort(key=lambda x: abs(x[1]), reverse=True)
    if top_n:
        tokens_values = tokens_values[:top_n]
    return ", ".join([f'"{t}": {v:.3f}' for t, v in tokens_values])

# Build the summary table
summary_rows = []

for sentence_id in merged_df['sentence_id'].unique():
    sentence_group = merged_df[merged_df['sentence_id'] == sentence_id]
    
    # Get sentence metadata (from first row since all rows have same metadata)
    first_row = sentence_group.iloc[0]
    text = first_row['sentence']
    predicted_label = first_row['predicted_label']
    actual_label = first_row['actual_label']
    
    # Format token rankings (using SHAP values)
    token_rankings = format_token_rankings(sentence_group, 'value_shap')
    
    # Get similarity scores
    sim_row = sentence_similarity[sentence_similarity['sentence_id'] == sentence_id].iloc[0]
    
    summary_rows.append({
        'Text Instance': text,
        'Predicted Label': predicted_label,
        'Actual Label': actual_label,
        'Token Rankings': token_rankings,
        'Cosine Similarity': round(sim_row['cosine_similarity'], 3),
        'Pearson R': round(sim_row['pearson_correlation'], 3),
        'JS Divergence': round(sim_row['js_divergence'], 3)
    })

summary_table = pd.DataFrame(summary_rows)
summary_table.to_csv(results_path / 'explanation_summary_table.csv', index=False)

In [37]:
summary_table = pd.read_csv(results_path / 'explanation_summary_table.csv')

pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', None)

# Display with HTML styling for text wrapping
styled_html = """
<style>
    .dataframe td {
        white-space: normal !important;
        word-wrap: break-word !important;
        max-width: 300px;
        text-align: left;
        vertical-align: top;
    }
    .dataframe th {
        text-align: center;
        background-color: #2d3748;
        color: white;
    }
    .dataframe {
        border-collapse: collapse;
        width: 100%;
    }
    .dataframe td, .dataframe th {
        border: 1px solid #4a5568;
        padding: 8px;
    }
</style>
"""

correct_predictions = summary_table[summary_table['Predicted Label'] == summary_table['Actual Label']]

# Display a sample of 5 rows
display(HTML(styled_html + correct_predictions.sample(5).to_html(index=False, escape=False)))

Text Instance,Predicted Label,Actual Label,Token Rankings,Cosine Similarity,Pearson R,JS Divergence
The iran government is known be friendly by its citizens,0,0,"""friendly"": -0.073, ""its"": 0.034, ""government"": -0.022, ""The"": -0.022, ""known"": 0.018, ""is"": 0.013, ""iran"": 0.008, ""citizens"": 0.007, ""be"": 0.005, ""by"": -0.003",0.611,0.609,0.154
Sharia law was present in that country. It promotes women's freedom to dress and act in any way.,0,0,"""freedom"": -0.234, ""Sharia"": 0.225, ""dress"": -0.171, ""present"": -0.104, ""promotes"": -0.086, ""to"": 0.062, ""that"": 0.056, ""It"": 0.052, ""in"": 0.045, ""in"": 0.045, ""law"": 0.041, ""s"": 0.032, ""act"": -0.028, ""women"": -0.027, ""way"": 0.021, ""was"": -0.016, ""any"": -0.013, ""country"": 0.012, ""in"": -0.011, ""in"": -0.011, ""and"": 0.006",0.43,0.44,0.139
The man from Crimea had a nice car. He loved eating mediteranean food and hated asian food.,1,1,"""Crimea"": 0.188, ""asian"": 0.147, ""He"": 0.143, ""food"": -0.131, ""food"": -0.131, ""eating"": 0.107, ""car"": -0.101, ""and"": 0.065, ""mediteranean"": 0.064, ""food"": -0.048, ""food"": -0.048, ""a"": 0.042, ""loved"": -0.040, ""man"": 0.032, ""nice"": -0.023, ""had"": 0.008, ""from"": -0.006, ""hated"": -0.004, ""The"": 0.004",0.649,0.638,0.246
Qatari skyscrapers are some of the most amazing in the world.,0,0,"""amazing"": 0.137, ""world"": 0.071, ""skyscrapers"": -0.064, ""are"": 0.041, ""some"": -0.041, ""the"": 0.021, ""the"": 0.021, ""of"": -0.017, ""the"": 0.013, ""the"": 0.013, ""in"": 0.005, ""Qatari"": 0.003, ""most"": -0.002",0.72,0.696,0.228
South africa is known for its racial harmony.,0,0,"""racial"": 0.352, ""harmony"": -0.261, ""africa"": -0.112, ""known"": 0.066, ""for"": 0.054, ""is"": -0.029, ""its"": 0.028, ""South"": 0.016",0.736,0.744,0.118
