In [232]:
import pandas as pd
import sys
[sys.path.append(i) for i in ['.', '..']]
from config import DATA_DIR,FINAL_ANNOTATION_FILE_PATH
from os.path import join
from ast import literal_eval 

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 1000)

## By model

In [233]:
df_m = pd.read_csv(join(DATA_DIR, "counterspeech_diversity_evaluation_by_model.csv"))

In [234]:
df_m

Unnamed: 0,model,corpus_distinct_1grams,corpus_distinct_2grams,corpus_self_bleu_mean,corpus_self_bleu_std,corpus_jaccard_diversity_mean,corpus_jaccard_diversity_std,corpus_distinct_3_sentence_starts,common_sentence_starts_3,common_sentences,corpus_semantic_similarity_mean,corpus_semantic_similarity_std,mean_sentence_length,pct_questions,pct_exclamations,mean_text_length,response_distinct_2grams_mean,response_distinct_2grams_std,messages_with_emojis,unique_emojis,responses_with_hashtags,hashtags,responses_with_urls,unique_urls,messages_with_entities,persons_message,persons_response,organizations_message,organizations_response,sentiment
0,llama3,0.150566,0.475825,0.470451,0.100121,0.774002,0.037507,0.296296,"[(""It's also important"", 59), (""Let's focus on...","[(""Let's focus on the facts"", 4), (""Instead of...",0.641531,0.119619,22.093192,0.000868,9e-05,1267.414474,0.949263,0.035295,0,set(),8,"['#Qanon', '#Qanon', '#NHI', '#UFO', '#TruthOv...",0,set(),109,"[['Biden'], [], [], ['Benjamin Netanyahu'], ['...","[['Joe Biden'], [], [], ['Benjamin Netanyahu']...","[[], [], ['#', 'Qanon'], ['QAnon'], [], ['MAGA...","[[], [], ['#', 'Qanon', '#', 'Qanon'], ['QAnon...","Counter({'positive': 108, 'negative': 43, 'neu..."
1,mistral,0.257401,0.724732,0.201746,0.075483,0.845654,0.02141,0.65879,"[(""Let's focus on"", 27), (""It's important to"",...","[('2', 5), ('', 4), ('4', 3), ('3', 3), ('Vacc...",0.581122,0.123281,18.925994,0.001075,0.000244,795.046053,0.992059,0.012807,5,"{'🧐', '💪', '💬', '🍵', '💡', '🔍', '💉', '🇱', '📰', ...",41,"['#PromoteUnderstanding', '#EngageInCriticalTh...",2,"{'https://www.fda.gov/', 'https://www.epa.gov/...",94,"[['Biden'], [], [], ['Benjamin Netanyahu'], ['...","[['Biden'], [], [], ['Benjamin Netanyahu'], []...","[[], [], ['#', 'Qanon'], ['QAnon'], [], ['MAGA...","[[], [], ['QAnon', 'FBI', 'The International L...","Counter({'positive': 108, 'negative': 41, 'neu..."
2,gpt4o,0.22207,0.692424,0.225919,0.076121,0.827805,0.022569,0.594008,"[(""It's understandable to"", 40), (""It's import...","[(""It's important to examine claims critically...",0.648989,0.11623,18.048504,0.002058,0.0004,709.375,0.994808,0.00832,0,set(),3,"['#KenyaFacts', '#ConsiderBothSides', '#Scienc...",0,set(),51,"[['Biden'], [], [], ['Benjamin Netanyahu'], ['...","[['Biden'], [], [], [], [], [], [], [], ['Geor...","[[], [], ['#', 'Qanon'], ['QAnon'], [], ['MAGA...","[[], [], [], [], [], [], [], ['the World Econo...","Counter({'positive': 142, 'negative': 9, 'neut..."
3,random,0.200843,0.610766,0.290158,0.122831,0.821016,0.036459,0.486066,"[(""It's important to"", 42), (""Let's focus on"",...","[(""However it's essential to separate facts fr...",0.6,0.117009,20.432543,0.001561,0.00036,964.059211,0.974534,0.036762,0,set(),11,"['#SecondAmendment', '#GunSense', '#SafetyFirs...",1,"{'https://www.fda.gov/', 'https://www.epa.gov/...",86,"[['Qanon'], [], [], [], ['RFK Jr', 'Jesus'], [...","[[], [], [], [], [], ['Robert Fico'], ['Trump'...","[[], ['#', 'Qanon'], [], ['MAGA', 'QAnon'], []...",[['the Centers for Disease Control and Prevent...,"Counter({'positive': 110, 'negative': 42})"


### Metrics

- **corpus_distinct_1grams** / **corpus_distinct_2grams**: Takes all responses of a model into account and computes the percentage of unique 1-grams/2-grams. Scores are between 0 and 1, and scores close to 1 indicate a high amount of unique or little-repeated n-grams.
- **corpus_self_bleu_mean**/**corpus_self_bleu_std**: Self-BLEU computes the BLEU score between a response and all other responses by the same model. BLUE (coming from automated machine translation evaluation) gives a score close to 1 if texts are similar. As BLEU is based on comparisons of n-grams and n-substrings, it is similar to the firs metric.
- **corpus_jaccard_diversity_mean**/**corpus_jaccard_diversity_std**: Based on **pairwise Jaccard similarity**: for two responses, the Jaccard similarity is the set overlap, i.e., both responses are split at whitespaces, and the intersection of unique token is divided by their union. Then, for every response, we compute the **pariwise Jaccard similarity**, and take the maximum. We then report mean and std. 
- **corpus_distinct_3_sentence_starts**: Takes all responses of the model into account and computes the percentage of unique sentence starts (=first three words) among all sentence starts.
- **common_sentence_starts_3**/**common_sentences**: The 10 most common sentence starts (=first three words) and sentences per model.
- **corpus_semantic_similarity_mean**/**corpus_semantic_similarity_std**: Computes cosine similarity between pairs of embeddings of model responses, and takes the maximum similarity per response. Computes then mean and std. Embedding model is the Sentence Transformer `all-MiniLM-L6-v2`. Note that here we measure semantic similarity whereas above we measured lexical similarity. For comparison, we have computed the mean semantic similarity of the corpus, yielding `mean=0.51` and `std=0.08`.
- **pct_questions**/**pct_exclamations**: Percentage of question and exclamation marks among all sentence endings.
- **response_distinct_2grams_mean**/**response_distinct_2grams_std**: Computes the percentage of unique 2-grams per message and reports the mean and std.
- **messages_with_emojis**/**unique_emojis**: Number of messages containing an emoji + unique emojis
- **remaining columns**: number of messages with X, and unique X. For named entitites: organizations and persons

In [235]:
df_m["pct_questions"]*100

0    0.086756
1    0.107532
2    0.205844
3    0.156069
Name: pct_questions, dtype: float64

In [236]:
# llama3
df_m.loc[0,"common_sentence_starts_3"]

'[("It\'s also important", 59), ("Let\'s focus on", 56), ("It\'s important to", 54), ("It\'s essential to", 39), (\'Instead of spreading\', 32), ("It\'s crucial to", 29), (\'By doing so\', 29), ("While it\'s true", 28), (\'Instead of focusing\', 21), (\'The claim that\', 20)]'

In [237]:
#mistral
df_m.loc[1,"common_sentence_starts_3"]

'[("Let\'s focus on", 27), ("It\'s important to", 21), ("It\'s crucial to", 15), ("It\'s essential to", 13), ("Instead let\'s focus", 11), (\'As for the\', 9), ("However it\'s essential", 6), ("However it\'s crucial", 6), ("Let\'s examine the", 6), ("Let\'s consider the", 5)]'

In [238]:
#gpt
df_m.loc[2,"common_sentence_starts_3"]

'[("It\'s understandable to", 40), ("It\'s important to", 39), ("Let\'s focus on", 14), ("It\'s crucial to", 13), ("However it\'s important", 11), ("It\'s essential to", 10), ("It\'s natural to", 9), (\'The idea of\', 8), (\'Critical thinking helps\', 6), (\'Critical thinking is\', 5)]'

In [239]:
#random
df_m.loc[3,"common_sentence_starts_3"]

'[("It\'s important to", 42), ("Let\'s focus on", 35), ("It\'s essential to", 27), ("It\'s also important", 26), ("It\'s crucial to", 23), ("It\'s understandable to", 23), ("While it\'s true", 14), (\'Instead of spreading\', 14), ("However it\'s important", 11), ("However it\'s essential", 10)]'

In [240]:
# llama3
df_m.loc[0,"hashtags"]

"['#Qanon', '#Qanon', '#NHI', '#UFO', '#TruthOverFiction', '#ChildProtection', '#TransparencyInGovernment', '#FactBasedDiscussions', '#CriticalThinking', '#BigPharma', '#NHS', '#BigPharma', '#Privatisation', '#FactBasedInformation', '#RespectForAll', '#GeoEngineered', '#chemtrails', '#ClimateChange', '#ScienceOverSpeculation', '#Spain', '#France', '#Italy', '#ClimateAction', '#DisasterRelief']"

In [241]:
# mistral
df_m.loc[1,"hashtags"]

"['#PromoteUnderstanding', '#EngageInCriticalThinking', '#SupportCivilDiscourse', '#SecondAmendment', '#GunSense', '#SafetyFirst', '#QAnon', '#FactCheck', '#2A', '#MAGA', '#VaccineSafety', '#EvidenceBased', '#TrustNews', '#CriticalJournalism', '#ReliableNews', '#MediaAccountability', '#DeepState', '#deepstate', '#DeepStateAgenda', '#PoliticalResearchMatters', '#CriticalThinkingIsKey', '#FactCheckFirst', '#InformedVoting', '#FactsMatter', '#DismissConspiracies', '#AI', '#DeepState', '#DeepState', '#ProjectNimbus', '#Peacefuldialogue', '#Solidarity', '#WW3', '#FactCheck', '#CriticalThinking', '#BreakingNews', '#Preparedness', '#CriticalThinking', '#EmergencyPlan', '#StayInformed', '#StayCalm', '#EconomicReset', '#XRP', '#XLM', '#WW3', '#EmpathyOverFear', '#FactsFirst', '#EmpathyMatters', '#ScienceWinsOverFear', '#RespectfulDiscourse', '#WEF', '#globalists', '#FactOverFear', '#VoteSmart', '#FactsMatter', '#VerifiedInformation', '#DemocraticProcess', '#CriticalThinking', '#EmbraceProgress'

In [242]:
# gpt-4o
df_m.loc[2,"hashtags"]

"['#KenyaFacts', '#ConsiderBothSides', '#ScienceMatters', '#InformedDecisions', '#UnityForValencia']"

In [243]:
# mistral
df_m.loc[1,"unique_urls"]

"{'https://www.fda.gov/', 'https://www.epa.gov/sites/default/files/2019-05/documents/2017_aerosols.pdf', 'https://www.mayoclinic.org/diseases-conditions/coronavirus/in-depth/ivermectin/art-20493668'}"

## By model and theme


In [244]:
df_m_t = pd.read_csv(join(DATA_DIR, "counterspeech_diversity_evaluation_by_model_theme.csv"))

In [245]:
df_m_t

Unnamed: 0,theme,model,corpus_distinct_1grams,corpus_distinct_2grams,corpus_self_bleu_mean,corpus_self_bleu_std,corpus_jaccard_diversity_mean,corpus_jaccard_diversity_std,corpus_distinct_3_sentence_starts,common_sentence_starts_3,common_sentences,corpus_semantic_similarity_mean,corpus_semantic_similarity_std,mean_sentence_length,pct_questions,pct_exclamations,mean_text_length,response_distinct_2grams_mean,response_distinct_2grams_std,messages_with_emojis,unique_emojis,responses_with_hashtags,hashtags,responses_with_urls,unique_urls,messages_with_entities,persons_message,persons_response,organizations_message,organizations_response,sentiment
0,1,llama3,0.193085,0.546422,0.412234,0.09218,0.783108,0.028685,0.347241,"[(""It's also important"", 32), (""It's important...",[('However this claim is unfounded and lacks c...,0.595869,0.09632,21.489247,0.000876,0.0,1215.0,0.948549,0.033147,0,set(),3,"['#Qanon', '#Qanon', '#NHI', '#UFO', '#TruthOv...",0,set(),57,"[['Biden'], [], [], ['Benjamin Netanyahu'], ['...","[['Joe Biden'], [], [], ['Benjamin Netanyahu']...","[[], [], ['#', 'Qanon'], ['QAnon'], [], ['MAGA...","[[], [], ['#', 'Qanon', '#', 'Qanon'], ['QAnon...","Counter({'positive': 51, 'negative': 23, 'neut..."
1,1,mistral,0.324887,0.788021,0.154099,0.054799,0.849923,0.020886,0.720322,"[(""Let's focus on"", 15), (""It's important to"",...","[('2', 3), ('4', 2), (""Examining your comment ...",0.549202,0.093033,18.519685,0.001276,0.000106,734.173333,0.993516,0.009781,1,"{'💪', '🇱', '🇵'}",19,"['#PromoteUnderstanding', '#EngageInCriticalTh...",0,set(),55,"[['Biden'], [], [], ['Benjamin Netanyahu'], ['...","[['Biden'], [], [], ['Benjamin Netanyahu'], []...","[[], [], ['#', 'Qanon'], ['QAnon'], [], ['MAGA...","[[], [], ['QAnon', 'FBI', 'The International L...","Counter({'positive': 48, 'negative': 27})"
2,1,gpt4o,0.282776,0.759416,0.178819,0.066418,0.835962,0.018363,0.653361,"[(""It's important to"", 23), (""It's understanda...","[(""It's important to examine claims critically...",0.614256,0.087189,18.119748,0.002203,0.000116,706.853333,0.994857,0.008067,0,set(),1,"['#KenyaFacts', '#ConsiderBothSides']",0,set(),26,"[['Biden'], [], [], ['Benjamin Netanyahu'], ['...","[['Biden'], [], [], [], [], [], [], [], ['Geor...","[[], [], ['#', 'Qanon'], ['QAnon'], [], ['MAGA...","[[], [], [], [], [], [], [], ['the World Econo...","Counter({'positive': 70, 'negative': 4, 'neutr..."
3,1,random,0.274687,0.720004,0.185299,0.08514,0.843076,0.025252,0.626354,"[(""It's important to"", 17), (""Let's focus on"",...",[('Studies on vaccine safety are conducted rig...,0.555668,0.100559,19.697842,0.001735,0.0,855.28,0.983223,0.025506,0,set(),8,"['#VaccineSafety', '#EvidenceBased', '#SecondA...",0,set(),45,"[['Qanon'], [], [], ['Robert Fico', 'Fico'], [...","[[], [], [], [], ['Trump'], ['Joe Biden'], [],...","[[], ['#', 'Qanon'], ['MAGA', 'QAnon'], [], ['...",[['the Centers for Disease Control and Prevent...,"Counter({'positive': 59, 'negative': 15, 'neut..."
4,2,llama3,0.183458,0.523542,0.438332,0.111158,0.769155,0.045947,0.356201,"[(""Let's focus on"", 29), (""It's also important...","[(""Let's focus on the facts"", 3), (""Instead of...",0.674952,0.135792,22.762533,0.000753,0.000116,1321.960526,0.950383,0.037315,0,set(),5,"['#BigPharma', '#NHS', '#BigPharma', '#Privati...",0,set(),51,"[[], ['@LeilaniDowding', '@ellymelly', 'Leilan...","[[], [], [], [], [], [], [], [], ['Igor Kirill...","[['The @WorkersPartyGB fights', 'BigPharma', ...","[['@WorkersPartyGB', 'NHS', 'NHS', 'the @Worke...","Counter({'positive': 56, 'negative': 20})"
5,2,mistral,0.304111,0.772742,0.166433,0.077646,0.845268,0.022437,0.740541,"[(""Let's focus on"", 11), (""It's important to"",...","[('', 3), ('ClimateAction', 2), ('2', 2), ('3'...",0.608424,0.142119,19.359788,0.000911,0.000273,859.921053,0.990516,0.015111,4,"{'🔎', '🌱', '💉', '🌫', '🧐', '🤔', '🍵', '🔍', '🤝', ...",21,"['#BigPharma', '#VaccineSafety', '#EvidenceBas...",2,{'https://www.epa.gov/sites/default/files/2019...,39,"[[], ['@LeilaniDowding', '@ellymelly', 'Leilan...","[[], [""Leilani Dowding's""], ['Trump'], [], [],...","[['The @WorkersPartyGB fights', 'BigPharma', ...","[['@WorkersPartyGB', 'NHS', 'NHS'], [], ['the ...","Counter({'positive': 59, 'negative': 14, 'neut..."
6,2,gpt4o,0.276042,0.743161,0.194188,0.074165,0.824328,0.027456,0.667351,"[(""It's understandable to"", 26), (""It's import...","[(""It's important to recognize the value of ou...",0.672554,0.134265,17.95082,0.001826,0.000685,713.0,0.99484,0.008587,0,set(),2,"['#ScienceMatters', '#InformedDecisions', '#Un...",0,set(),25,"[[], ['@LeilaniDowding', '@ellymelly', 'Leilan...","[[], [], [], [], [], [], [], [], [], [], [], [...","[['The @WorkersPartyGB fights', 'BigPharma', ...","[[], [], [], ['the World Health Organization',...","Counter({'positive': 71, 'negative': 5})"
7,2,random,0.257506,0.683277,0.246046,0.111307,0.827032,0.034525,0.617124,"[(""It's important to"", 23), (""Let's focus on"",...","[(""Acknowledging the concerns about hurricane ...",0.637338,0.137713,20.080257,0.000719,0.0004,976.526316,0.978808,0.024196,0,set(),13,"['#VaccineSafety', '#EvidenceBased', '#FactBas...",2,{'https://www.epa.gov/sites/default/files/2019...,37,"[[], ['RFK Jr', 'Jesus'], ['@LeilaniDowding', ...","[[], [""RFK Jr's""], [], [], [], [], ['Alex Jone...","[[], [], ['@adhtvaus'], ['#BigPharma'], [], []...","[[], ['the Centers for Disease Control and Pre...","Counter({'positive': 59, 'negative': 16, 'neut..."


- Mean semantic similarity for messages in theme 1.0: 0.49
- Std semantic similarity for messages in theme 1.0: 0.07
- Mean semantic similarity for messages in theme 2.0: 0.52
- Std semantic similarity for messages in theme 2.0: 0.1
