In [2]:
import pandas as pd
import tensorflow as tf
from transformers import (
    DistilBertTokenizer, TFDistilBertForSequenceClassification,
    RobertaTokenizer, TFRobertaForSequenceClassification,
)
from openai import OpenAI
import re
from Levenshtein import distance as levenshtein_distance




  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load the fine-tuned DistilBERT model and tokenizer
distilbert_tokenizer = DistilBertTokenizer.from_pretrained('./distilbert_finetuned')
distilbert_model = TFDistilBertForSequenceClassification.from_pretrained('./distilbert_finetuned')

# Load the fine-tuned RoBERTa model and tokenizer for the first variant
roberta_tokenizer_v1 = RobertaTokenizer.from_pretrained('./roberta_v1_finetuned')
roberta_model_v1 = TFRobertaForSequenceClassification.from_pretrained('./roberta_v1_finetuned')

# Load the fine-tuned RoBERTa model and tokenizer for the second variant
roberta_tokenizer_v2 = RobertaTokenizer.from_pretrained('./roberta_v2_finetuned')
roberta_model_v2 = TFRobertaForSequenceClassification.from_pretrained('./roberta_v2_finetuned')

def ensemble_classify_news_and_evaluate_accuracy(df, column):
    # Lists to store individual model predictions
    distilbert_predictions = []
    roberta_v1_predictions = []
    roberta_v2_predictions = []
    ensemble_predictions = []
    
    for _, row in df.iterrows():
        text_input = row[column]

        # Prepare inputs and get probabilities for DistilBERT
        distilbert_inputs = distilbert_tokenizer(text_input, return_tensors="tf", truncation=True, padding='max_length', max_length=512)
        distilbert_outputs = distilbert_model(distilbert_inputs)
        distilbert_probabilities = tf.nn.softmax(distilbert_outputs.logits, axis=-1)
        distilbert_predicted_class_index = tf.argmax(distilbert_probabilities, axis=-1).numpy()[0]
        distilbert_predictions.append(True if distilbert_predicted_class_index == 1 else False)

        # Prepare inputs and get probabilities for RoBERTa variant 1
        roberta_inputs_v1 = roberta_tokenizer_v1(text_input, return_tensors="tf", truncation=True, padding='max_length', max_length=512)
        roberta_outputs_v1 = roberta_model_v1(roberta_inputs_v1)
        roberta_probabilities_v1 = tf.nn.softmax(roberta_outputs_v1.logits, axis=-1)
        roberta_v1_predicted_class_index = tf.argmax(roberta_probabilities_v1, axis=-1).numpy()[0]
        roberta_v1_predictions.append(True if roberta_v1_predicted_class_index == 1 else False)

        # Prepare inputs and get probabilities for RoBERTa variant 2
        roberta_inputs_v2 = roberta_tokenizer_v2(text_input, return_tensors="tf", truncation=True, padding='max_length', max_length=512)
        roberta_outputs_v2 = roberta_model_v2(roberta_inputs_v2)
        roberta_probabilities_v2 = tf.nn.softmax(roberta_outputs_v2.logits, axis=-1)
        roberta_v2_predicted_class_index = tf.argmax(roberta_probabilities_v2, axis=-1).numpy()[0]
        roberta_v2_predictions.append(True if roberta_v2_predicted_class_index == 1 else False)

        # Ensemble: Average the probabilities from all models
        avg_probabilities = (distilbert_probabilities + roberta_probabilities_v1 + roberta_probabilities_v2) / 3
        predicted_class_index = tf.argmax(avg_probabilities, axis=-1).numpy()[0]
        ensemble_predictions.append(True if predicted_class_index == 1 else False)

    # Adding predictions to the DataFrame
    df[f'DistilBERTPrediction_{column}'] = distilbert_predictions
    df[f'RoBERTaV1Prediction_{column}'] = roberta_v1_predictions
    df[f'RoBERTaV2Prediction_{column}'] = roberta_v2_predictions
    df[f'EnsemblePrediction_{column}'] = ensemble_predictions
    
    # Calculate and print the accuracy for the ensemble predictions
    # correct_predictions = (df[f'EnsemblePrediction_{column}'] == df['Label']).sum()
    # total_predictions = len(df)
    # accuracy = correct_predictions / total_predictions
    # print(f"Accuracy: {accuracy:.4f}")
    
    return df




All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at ./distilbert_finetuned.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at ./roberta_v1_finetuned.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at ./roberta_v2_

In [55]:
# # Ensure the get_gpt4_response function is adjusted if necessary
def get_gpt4_response(client, prompt):
    chat_completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    return chat_completion.choices[0].message.content

key_path = './apiKey.txt'
with open(key_path, 'r') as file:
    key = file.readline().strip()

client = OpenAI(
    api_key = key
)

def get_counterfactual(df, client):
    # Iterate over the DataFrame using .iterrows() for reading; however, to modify the original df, use .loc for assignment
    for index, row in df.iterrows():
        input_text = row['text']
        label = 'fake news' if row['EnsemblePrediction_Text'] else 'real news'
        
        # Your existing prompt creation and GPT-4 querying logic
        # Simplified task description without source information
        task_description = "You are assisting in a task that involves classifying tweets about COVID-19 as either misinformation or reliable information."
        
        prompt1 = f"""
        {task_description}
        Given the classifier's label of "{label}" for the following headline, 
        identify the key factors (latent features) that likely influenced this classification decision. 
        List the latent features as a comma-separated list.

        Headline: "{input_text}"
        Label: {label}

        Begin identifying latent features:
        """
        latent_features = get_gpt4_response(client, prompt1)
        
        prompt2 = f"""
        Based on the previously identified latent features in the classification of the headline as "{label}",
        now identify and list the specific words in the headline related to these features. 
        List the words as a comma-separated list.

        Latent features: {latent_features}
        Headline: "{input_text}"
        Label: {label}

        Begin identifying words:
        """
        identified_words = get_gpt4_response(client, prompt2)
        
        new_label = 'real news' if row['EnsemblePrediction_Text'] else 'fake news'
        prompt3 = f"""
        Generate a edited version of the original headline to
        alter the classification of the headline, aiming to change the label from "{label}" to "{new_label}.

        Original headline: "{input_text}"
        Latent features: {latent_features}
        Identified words associated with latent features: {identified_words}

        Generate the counterfactual headline:
        """
        counterfactual = get_gpt4_response(client, prompt3)
        
        # Update the original DataFrame directly
        df.loc[index, 'Latent Features'] = latent_features
        df.loc[index, 'Identified Words'] = identified_words
        df.loc[index, 'Counterfactual Text'] = counterfactual

    # No need to return a new DataFrame; the original df has been updated
    return df

In [56]:
val_data = pd.read_csv("./Data/Processed/Constraint_Val_Labeled.csv").drop(columns=["Unnamed: 0"])
display(val_data)

Unnamed: 0,text,label,DistilBERTPrediction_Text,RoBERTaV1Prediction_Text,RoBERTaV2Prediction_Text,EnsemblePrediction_Text
0,Chinese converting to Islam after realising th...,False,False,False,False,False
1,11 out of 13 people (from the Diamond Princess...,False,False,False,False,False
2,"COVID-19 Is Caused By A Bacterium, Not Virus A...",False,False,False,False,False
3,Mike Pence in RNC speech praises Donald Trump’...,False,False,False,False,False
4,6/10 Sky's @EdConwaySky explains the latest #C...,True,True,True,True,True
...,...,...,...,...,...,...
2135,Donald Trump wrongly claimed that New Zealand ...,False,False,False,False,False
2136,Current understanding is #COVID19 spreads most...,True,True,True,True,True
2137,Nothing screams “I am sat around doing fuck al...,False,False,False,False,False
2138,Birx says COVID-19 outbreak not under control ...,False,False,True,False,False


In [57]:
test = pd.read_csv("sample.csv")[['text', 'label', 'DistilBERTPrediction_Text', 'RoBERTaV1Prediction_Text', 'RoBERTaV2Prediction_Text', 'EnsemblePrediction_Text']]
test = get_counterfactual(test, client)
display(test)

Unnamed: 0,text,label,DistilBERTPrediction_Text,RoBERTaV1Prediction_Text,RoBERTaV2Prediction_Text,EnsemblePrediction_Text,Latent Features,Identified Words,Counterfactual Text
0,With financial support from @WorldBank WHO pro...,True,True,True,True,True,- Use of official organizations such as World ...,- World Bank\n- WHO\n- district hospitals\n- L...,"""Without any funding, WHO struggles to provide..."
1,#CoronaVirusUpdates #IndiaFightsCorona Total r...,True,True,True,True,True,"1. Use of hashtags (#CoronavirusUpdates, #Indi...","#CoronavirusUpdates, #IndiaFightsCorona, Total...",#CoronaVirusUpdates #IndiaFightsCorona Challen...
2,_A video has been viewed tens of thousands of ...,False,False,False,False,False,1. Video evidence of police beating a man\n2. ...,"police, beating, man, video, viewed, tens of t...","""_Fake video circulating on social media claim..."
3,It will be difficult to put an exact date on t...,True,True,True,False,True,1. Lack of specific information on the vaccine...,"exact, date, availability, COVID19, vaccine, U...","""Union Health Minister @drharshvardhan provide..."
4,@DallasBarnett3 Kia ora Dallas yes. If someone...,True,True,True,True,True,1. Refusal of COVID-19 test\n2. Extended stay ...,"Refuses, test, stay, managed isolation, costs,...","""Official confirmation: Those who refuse COVID..."
...,...,...,...,...,...,...,...,...,...
495,Self-medicate COVID-19 using a home remedy wit...,False,False,False,False,False,- Mention of specific ingredients for self-med...,"aspirin, lemon, honey, home remedy, self-medicate","""Warning: Deadly risks of self-medicating COVI..."
496,Use the Coronavirus Self-Checker to help decid...,True,True,True,True,True,"- Use of ""self-checker"" tool\n- Mention of see...","Coronavirus, Self-Checker, seeking, testing, m...","""Use the COVID-19 Self-Checker tool to determi..."
497,We've built out a tool CAN Compare for decisio...,True,True,True,False,True,- Mention of a tool for comparing COVID risk\n...,"tool, Compare, Covid, risk, state, county, acr...","New tool ""CAN Compare"" reveals insights into c..."
498,#CoronaVirusUpdates #IndiaFightsCorona More th...,True,True,True,True,True,1. Use of hashtags like #CoronaVirusUpdates an...,"#CoronaVirusUpdates, #IndiaFightsCorona, More,...","""#IndiaFightsCorona: Recent statistics show a ..."


In [58]:
# Function to strip tags
def strip_tags(text):
    return re.sub(r"<new>|</new>|\"", "", text)

# Apply the function to strip tags from the Counterfactual Text
test['Counterfactual Text'] = test['Counterfactual Text'].apply(strip_tags)
display(test)

Unnamed: 0,text,label,DistilBERTPrediction_Text,RoBERTaV1Prediction_Text,RoBERTaV2Prediction_Text,EnsemblePrediction_Text,Latent Features,Identified Words,Counterfactual Text
0,With financial support from @WorldBank WHO pro...,True,True,True,True,True,- Use of official organizations such as World ...,- World Bank\n- WHO\n- district hospitals\n- L...,"Without any funding, WHO struggles to provide ..."
1,#CoronaVirusUpdates #IndiaFightsCorona Total r...,True,True,True,True,True,"1. Use of hashtags (#CoronavirusUpdates, #Indi...","#CoronavirusUpdates, #IndiaFightsCorona, Total...",#CoronaVirusUpdates #IndiaFightsCorona Challen...
2,_A video has been viewed tens of thousands of ...,False,False,False,False,False,1. Video evidence of police beating a man\n2. ...,"police, beating, man, video, viewed, tens of t...",_Fake video circulating on social media claims...
3,It will be difficult to put an exact date on t...,True,True,True,False,True,1. Lack of specific information on the vaccine...,"exact, date, availability, COVID19, vaccine, U...",Union Health Minister @drharshvardhan provides...
4,@DallasBarnett3 Kia ora Dallas yes. If someone...,True,True,True,True,True,1. Refusal of COVID-19 test\n2. Extended stay ...,"Refuses, test, stay, managed isolation, costs,...",Official confirmation: Those who refuse COVID-...
...,...,...,...,...,...,...,...,...,...
495,Self-medicate COVID-19 using a home remedy wit...,False,False,False,False,False,- Mention of specific ingredients for self-med...,"aspirin, lemon, honey, home remedy, self-medicate",Warning: Deadly risks of self-medicating COVID...
496,Use the Coronavirus Self-Checker to help decid...,True,True,True,True,True,"- Use of ""self-checker"" tool\n- Mention of see...","Coronavirus, Self-Checker, seeking, testing, m...",Use the COVID-19 Self-Checker tool to determin...
497,We've built out a tool CAN Compare for decisio...,True,True,True,False,True,- Mention of a tool for comparing COVID risk\n...,"tool, Compare, Covid, risk, state, county, acr...",New tool CAN Compare reveals insights into cou...
498,#CoronaVirusUpdates #IndiaFightsCorona More th...,True,True,True,True,True,1. Use of hashtags like #CoronaVirusUpdates an...,"#CoronaVirusUpdates, #IndiaFightsCorona, More,...",#IndiaFightsCorona: Recent statistics show a s...


In [59]:
test = ensemble_classify_news_and_evaluate_accuracy(test, 'Counterfactual Text')
display(test)

Unnamed: 0,text,label,DistilBERTPrediction_Text,RoBERTaV1Prediction_Text,RoBERTaV2Prediction_Text,EnsemblePrediction_Text,Latent Features,Identified Words,Counterfactual Text,DistilBERTPrediction_Counterfactual Text,RoBERTaV1Prediction_Counterfactual Text,RoBERTaV2Prediction_Counterfactual Text,EnsemblePrediction_Counterfactual Text
0,With financial support from @WorldBank WHO pro...,True,True,True,True,True,- Use of official organizations such as World ...,- World Bank\n- WHO\n- district hospitals\n- L...,"Without any funding, WHO struggles to provide ...",False,False,False,False
1,#CoronaVirusUpdates #IndiaFightsCorona Total r...,True,True,True,True,True,"1. Use of hashtags (#CoronavirusUpdates, #Indi...","#CoronavirusUpdates, #IndiaFightsCorona, Total...",#CoronaVirusUpdates #IndiaFightsCorona Challen...,True,True,True,True
2,_A video has been viewed tens of thousands of ...,False,False,False,False,False,1. Video evidence of police beating a man\n2. ...,"police, beating, man, video, viewed, tens of t...",_Fake video circulating on social media claims...,False,False,False,False
3,It will be difficult to put an exact date on t...,True,True,True,False,True,1. Lack of specific information on the vaccine...,"exact, date, availability, COVID19, vaccine, U...",Union Health Minister @drharshvardhan provides...,True,True,False,True
4,@DallasBarnett3 Kia ora Dallas yes. If someone...,True,True,True,True,True,1. Refusal of COVID-19 test\n2. Extended stay ...,"Refuses, test, stay, managed isolation, costs,...",Official confirmation: Those who refuse COVID-...,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,Self-medicate COVID-19 using a home remedy wit...,False,False,False,False,False,- Mention of specific ingredients for self-med...,"aspirin, lemon, honey, home remedy, self-medicate",Warning: Deadly risks of self-medicating COVID...,False,False,False,False
496,Use the Coronavirus Self-Checker to help decid...,True,True,True,True,True,"- Use of ""self-checker"" tool\n- Mention of see...","Coronavirus, Self-Checker, seeking, testing, m...",Use the COVID-19 Self-Checker tool to determin...,True,True,True,True
497,We've built out a tool CAN Compare for decisio...,True,True,True,False,True,- Mention of a tool for comparing COVID risk\n...,"tool, Compare, Covid, risk, state, county, acr...",New tool CAN Compare reveals insights into cou...,False,True,False,False
498,#CoronaVirusUpdates #IndiaFightsCorona More th...,True,True,True,True,True,1. Use of hashtags like #CoronaVirusUpdates an...,"#CoronaVirusUpdates, #IndiaFightsCorona, More,...",#IndiaFightsCorona: Recent statistics show a s...,False,True,True,True


In [60]:
test['Levenshtein Distance'] = test.apply(lambda row: levenshtein_distance(row['text'], row['Counterfactual Text']), axis=1)

# Display the modified DataFrame
display(test[['text', 'Counterfactual Text', 'Levenshtein Distance']])
average_levenshtein_distance = test['Levenshtein Distance'].mean()
print(f"Average Levenshtein Distance: {average_levenshtein_distance:.2f}")

Unnamed: 0,text,Counterfactual Text,Levenshtein Distance
0,With financial support from @WorldBank WHO pro...,"Without any funding, WHO struggles to provide ...",127
1,#CoronaVirusUpdates #IndiaFightsCorona Total r...,#CoronaVirusUpdates #IndiaFightsCorona Challen...,59
2,_A video has been viewed tens of thousands of ...,_Fake video circulating on social media claims...,155
3,It will be difficult to put an exact date on t...,Union Health Minister @drharshvardhan provides...,209
4,@DallasBarnett3 Kia ora Dallas yes. If someone...,Official confirmation: Those who refuse COVID-...,159
...,...,...,...
495,Self-medicate COVID-19 using a home remedy wit...,Warning: Deadly risks of self-medicating COVID...,59
496,Use the Coronavirus Self-Checker to help decid...,Use the COVID-19 Self-Checker tool to determin...,83
497,We've built out a tool CAN Compare for decisio...,New tool CAN Compare reveals insights into cou...,158
498,#CoronaVirusUpdates #IndiaFightsCorona More th...,#IndiaFightsCorona: Recent statistics show a s...,109


Average Levenshtein Distance: 106.35


In [61]:
original_pred_columns = ['DistilBERTPrediction_Text', 'RoBERTaV1Prediction_Text', 'RoBERTaV2Prediction_Text', 'EnsemblePrediction_Text']
counterfactual_pred_columns = ['DistilBERTPrediction_Counterfactual Text', 'RoBERTaV1Prediction_Counterfactual Text', 'RoBERTaV2Prediction_Counterfactual Text', 'EnsemblePrediction_Counterfactual Text']

# Calculate label flip score for each model
for original_col, counterfactual_col in zip(original_pred_columns, counterfactual_pred_columns):
    # Count the number of label flips
    label_flips = (test[original_col] != test[counterfactual_col]).sum()
    total_predictions = len(test)
    flip_score = label_flips / total_predictions
    print(f"Label flip score for {original_col.split('_')[0]}: {flip_score:.4f}")

Label flip score for DistilBERTPrediction: 0.3420
Label flip score for RoBERTaV1Prediction: 0.1160
Label flip score for RoBERTaV2Prediction: 0.2160
Label flip score for EnsemblePrediction: 0.2020


In [63]:

# Creating a new DataFrame to store examples
examples_df = pd.DataFrame()

# Iterate through each pair of columns
for original_col, counterfactual_col in zip(original_pred_columns, counterfactual_pred_columns):
    # Identify rows where predictions are different between original and counterfactual
    diff_mask = test[original_col] != test[counterfactual_col]
    
    if examples_df.empty:
        examples_df = test[diff_mask].copy()
    else:
        # Union the current differences with previously found ones
        examples_df = pd.concat([examples_df, test[diff_mask]], axis=0).drop_duplicates()

    if len(examples_df) >= 10:
        break  # Exit loop if we have found 10 or more examples

# Truncate to the first 10 examples if we have more than 10
examples_df = examples_df.head(10)

# Printing or returning the examples
print(examples_df[['text', 'Counterfactual Text'] + original_pred_columns + counterfactual_pred_columns])
examples_df.to_csv("examples.csv")


                                                 text  \
0   With financial support from @WorldBank WHO pro...   
4   @DallasBarnett3 Kia ora Dallas yes. If someone...   
5   Lymphocytopenia could predict #COVID19 outcome...   
11  Our daily update is published. The US has now ...   
13  COVID-19 Update⠀ ⠀ We have four additional pro...   
14  As per @MoHFW_INDIA revised discharge policy f...   
15  States reported 1192 deaths. The 7-day average...   
16  RT @drharshvardhan: #CoronaVirusUpdates Of the...   
19  Our daily update is published. States reported...   
25  Mike Pence introduces program to cure coronavi...   

                                  Counterfactual Text  \
0   Without any funding, WHO struggles to provide ...   
4   Official confirmation: Those who refuse COVID-...   
5   New Study Finds Link Between Lymphocytopenia a...   
11  New data released shows over 400,000 completed...   
13  Government Confirms Four New COVID-19 Cases in...   
14  As per@MoHFW_INDIA dischar