- sentiment analysis using lexical (vader) sentiment analysis
- compares the min/max and mean of sentiment across
- the original text,
- the openai generated text
- the deepseek generated text

In [52]:
import pandas as pd
import nltk
# nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from prettytable import PrettyTable

In [53]:
with open('../0_data/statements.csv', 'r') as file:
    headers = ['prompt', 'client', 'opt1', 'opt2', 'opt3', 'completion']
    df_generated = pd.read_csv(file, names=headers)
    
df_generated.head()

Unnamed: 0,prompt,client,opt1,opt2,opt3,completion
0,"[{'role': 'system', 'content': 'You are a EU p...",chatgpt,1,0.8,400,While the 2025 Trump administration's trade me...
1,"[{'role': 'system', 'content': 'You are a EU p...",chatgpt,1,0.8,400,In response to the 2025 Trump administration's...
2,"[{'role': 'system', 'content': 'You are a EU p...",chatgpt,1,0.8,400,"While we, as the European Union, respect the a..."
3,"[{'role': 'system', 'content': 'You are a EU p...",chatgpt,1,0.8,400,"Thank you, Chair. In response to the 2025 Trum..."
4,"[{'role': 'system', 'content': 'You are a EU p...",chatgpt,1,0.8,400,"Ladies and Gentlemen, in response to the 2025 ..."


In [54]:
with open ('../0_data/eu_debate_transcripts_translated.csv', 'r') as file:
    df_original = pd.read_csv(file)
    

df_original.head()

Unnamed: 0,speaker_name,party,language,mep_id,role,text,translated_text
0,Predsedajúci,,SK,197770,,Ďalším bodom programu je vyhlásenie Rady a Kom...,The next item is the statement by the Council ...
1,Adam | Szłapka,,EN,0,President-in-Office of the Council,"in-Office of the Council. – Mr President, hono...","in-Office of the Council. – Mr President, hono..."
2,Maroš | Šefčovič,,EN,0,Member of the Commission,"Mr President, honourable Members, dear Ministe...","Mr President, honourable Members, dear Ministe..."
3,Jörgen | Warborn,PPE,EN,197405,On behalf of the Group,"Mr President, Minister, Commissioner, colleagu...","Mr President, Minister, Commissioner, colleagu..."
4,Iratxe | García Pérez,S-D,ES,28298,On behalf of the Group,"Señor presidente, querido comisario Šefčovič, ...","Mr. President, dear Commissioner Šefčovič, Don..."


In [55]:
# cleaning
df_generated['completion'] = df_generated['completion'].str.replace('###', '', regex=False)

In [56]:
# Sentiment analysis
sia = SentimentIntensityAnalyzer()
def get_sentiment(text):
    sentiment = sia.polarity_scores(text)
    return sentiment['compound']

df_original['sentiment'] = df_original['translated_text'].apply(get_sentiment)
df_generated['sentiment'] = df_generated['completion'].apply(get_sentiment)

df_original

Unnamed: 0,speaker_name,party,language,mep_id,role,text,translated_text,sentiment
0,Predsedajúci,,SK,197770,,Ďalším bodom programu je vyhlásenie Rady a Kom...,The next item is the statement by the Council ...,0.6597
1,Adam | Szłapka,,EN,0,President-in-Office of the Council,"in-Office of the Council. – Mr President, hono...","in-Office of the Council. – Mr President, hono...",0.9984
2,Maroš | Šefčovič,,EN,0,Member of the Commission,"Mr President, honourable Members, dear Ministe...","Mr President, honourable Members, dear Ministe...",0.9998
3,Jörgen | Warborn,PPE,EN,197405,On behalf of the Group,"Mr President, Minister, Commissioner, colleagu...","Mr President, Minister, Commissioner, colleagu...",0.9869
4,Iratxe | García Pérez,S-D,ES,28298,On behalf of the Group,"Señor presidente, querido comisario Šefčovič, ...","Mr. President, dear Commissioner Šefčovič, Don...",0.9271
...,...,...,...,...,...,...,...,...
103,Nicolás | González Casares,S-D,ES,197718,,"En materia de comercio, este es el asunto que ...","In trade matters, this is the issue that is of...",0.8869
104,Javi | López,S-D,ES,125042,,En materia de comercio este es el asunto que m...,"In terms of trade, this is the issue that is o...",0.8856
105,César | Luena,S-D,ES,197721,,En materia de comercio este es el asunto que m...,"In terms of trade, this is the issue that is o...",0.8856
106,Cristina | Maestre,S-D,ES,197719,,En materia de comercio este es el asunto que m...,"In terms of trade, this is the issue that is o...",0.8856


In [58]:
def get_descriptive_printout(sentiment):
    return [sentiment.mean(), sentiment.std(), sentiment.min(), sentiment.max(), sentiment.count()]

sentiment_original = get_descriptive_printout(df_original['sentiment'])
is_openai = df_generated['client'] == 'chatgpt'
is_deepseek = df_generated['client'] == 'deepseek'
sentiment_openai_deepseek = get_descriptive_printout(df_generated[is_deepseek]['sentiment'])
sentiment_openai_chatgpt = get_descriptive_printout(df_generated[is_openai]['sentiment'])


# Create table
sentiment_table = PrettyTable()
sentiment_table.field_names = ['Source', 'Mean', 'Std Dev', 'Min', 'Max', 'Count']

sentiment_table.add_row(['Original', *sentiment_original])
sentiment_table.add_row(['ChatGPT', *sentiment_openai_chatgpt])
sentiment_table.add_row(['DeepSeek', *sentiment_openai_deepseek])

# Print table
print('Sentiment Analysis (Vader) Results:')
print('> uncleaned data')
print(sentiment_table)






Sentiment Analysis (Vader) Results:
> uncleaned data
+----------+--------------------+---------------------+---------+--------+-------+
|  Source  |        Mean        |       Std Dev       |   Min   |  Max   | Count |
+----------+--------------------+---------------------+---------+--------+-------+
| Original | 0.5240555555555555 |  0.687853355559086  | -0.9909 | 0.9998 |  108  |
| ChatGPT  | 0.9597109042553192 | 0.04376780507153794 |  0.5988 | 0.9951 |  376  |
| DeepSeek |     0.8755915      | 0.15718515945248165 | -0.4767 | 0.9887 |  1000 |
+----------+--------------------+---------------------+---------+--------+-------+
