In [None]:
import pandas as pd

from siebert_xai import visualize_ig_attr, ig_attr, lime_attr, visualize_lime_attr
import sys
sys.path.append('../lib/sentiment_analysis_utils')
sys.path.append('../lib')
from sentiment_analysis_utils import combine_lede_and_text, remove_text_formatting, read_all_news_in_dir
import os

## Loading and transforming the data

In [None]:
df_en_raw = read_all_news_in_dir(os.getcwd() + "/../data_preparation/raw_data/en/")
df_en_raw = combine_lede_and_text(df_en_raw)
df_en_raw = remove_text_formatting(df_en_raw)

In [None]:
sieberta_predictions = pd.read_csv(os.getcwd() + "/../data_preparation/document_predictions/test_dataseet_overall_sentiment_siebert-roberta.csv")
sieberta_predictions.drop(["Unnamed: 0.1"], axis=1, inplace=True)
sieberta_predictions

In [None]:
os.listdir(os.getcwd() + "/../data_preparation/test_set_annotated/")

In [None]:
test_dataset = pd.DataFrame()
for file_name in os.listdir(os.getcwd() + "/../data_preparation/test_set_annotated/"):
    test_dataset = pd.concat([
        test_dataset,
        pd.read_excel(os.getcwd() + "/../data_preparation/test_set_annotated/" + file_name)
])
def merging_function(row):
    try:
        return row[row.first_valid_index()] + 1
    except KeyError:
        return pd.NA
test_dataset.overall_sentiment = test_dataset[['overall_sentiment', 'overalll', 'overall']].apply(merging_function,axis=1)
test_dataset

In [None]:
predictions_labels_joined = test_dataset[['Unnamed: 0', 'overall_sentiment']].merge(sieberta_predictions, on='Unnamed: 0')
y_true = predictions_labels_joined.overall_sentiment
y_pred = predictions_labels_joined.overall_sentiment_name
incorrect_texts = predictions_labels_joined[y_true != y_pred][['text', 'overall_sentiment', 'overall_sentiment_name']]
incorrect_texts = incorrect_texts[incorrect_texts['overall_sentiment'].notna()]

In [None]:
incorrect_texts

## Loading model

In [None]:
from transformers import pipeline

sentiment_analysis = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")

## Select an article and compute attributions with respect to positive sentiment

In [None]:
incorrect_texts.iloc[0]

In [None]:
text = incorrect_texts.iloc[0].text
attr_label = [int(incorrect_texts.iloc[0].overall_sentiment_name)]
true_label = [int(incorrect_texts.iloc[0].overall_sentiment)]

In [None]:
attr, delta = ig_attr(sentiment_analysis, text, attr_label, return_convergence_delta=True)
visualize_ig_attr(sentiment_analysis, text, attr, delta, attr_label, [true_label])

In [None]:
attr = lime_attr(sentiment_analysis, text, attr_label, n_samples=1000)
visualize_lime_attr(sentiment_analysis, text, attr, attr_label, [1])

In [None]:
visualize_lime_attr(sentiment_analysis, text, 10 *attr, attr_label, [1])

In [None]:
text = "Today is a beautiful day and I can't stop smiling"
attr_label = 1
true_label = 1

In [None]:
attr, delta = ig_attr(sentiment_analysis, text, attr_label, return_convergence_delta=True)
visualize_ig_attr(sentiment_analysis, text, attr, delta, attr_label, [true_label])

In [None]:
attr = lime_attr(sentiment_analysis, text, attr_label)
visualize_lime_attr(sentiment_analysis, text, attr, attr_label, [1])