In [46]:
import torch
import pandas as pd
import numpy as np

In [72]:
# Define a dictionary with file paths and metadata
politicians_data = {
    'FdI_Meloni': {'file': 'politicians_data/GiorgiaMeloni_tweets_combined.csv', 'politician': 'Meloni', 'party': 'FdI'},
    'FdI_LaRussa': {'file': 'politicians_data/IgnazioLaRussa_tweets_combined.csv', 'politician': 'LaRussa', 'party': 'FdI'},
    'FI_Berlusconi': {'file': 'politicians_data/SilvioBerlusconi_tweets_combined.csv', 'politician': 'Berlusconi', 'party': 'FI'},
    'FI_Tajani': {'file': 'politicians_data/AntonioTajani_tweets_combined.csv', 'politician': 'Tajani', 'party': 'FI'},
    'Lega_Salvini': {'file': 'politicians_data/MatteoSalvini_tweets_combined.csv', 'politician': 'Salvini', 'party': 'Lega'},
    'M5S_DiMaio': {'file': 'politicians_data/luigidimaio_tweets_combined.csv', 'politician': 'Di Maio', 'party': 'M5S'},
    'M5S_Conte': {'file': 'politicians_data/GiuseppeConte_tweets_combined.csv', 'politician': 'Conte', 'party': 'M5S'},
    'Az_Calenda': {'file': 'politicians_data/CarloCalenda_tweets_combined.csv', 'politician': 'Calenda', 'party': 'Azione'},
    'IV_Renzi': {'file': 'politicians_data/MatteoRenzi_tweets_combined.csv', 'politician': 'Renzi', 'party': 'IV'},
    'PEeur_Bonino': {'file': 'politicians_data/emmabonino_tweets_combined.csv', 'politician': 'Bonino', 'party': 'PEeur'},
    'PD_Shlein': {'file': 'politicians_data/EllySchlein_tweets_combined.csv', 'politician': 'Schlein', 'party': 'PD'},
    'PD_Letta': {'file': 'politicians_data/EnricoLetta_tweets_combined.csv', 'politician': 'Letta', 'party': 'PD'},
    'EV_Fratoianni': {'file': 'politicians_data/NicolaFratoianni_tweets_combined.csv', 'politician': 'Fratoianni', 'party': 'EV'},
    'NcI_Lupi': {'file': 'politicians_data/MaurizioLupi_tweets_combined.csv', 'politician': 'Lupi', 'party': 'NcI'}
}

# Read and process each file
politicians_list = []
for key, data in politicians_data.items():
    df = pd.read_csv(data['file'])
    df['politician'] = data['politician']
    df['party'] = data['party']
    politicians_list.append(df)

# Concatenate all DataFrames into one
politicians = pd.concat(politicians_list, ignore_index=True)

# Create a csv file with the combined data
politicians.to_csv('politicians_combined.csv', index=False)

In [53]:
politicians.head()

Unnamed: 0,Date,ID,URL,Content,Likes,Retweets,politician,party
0,2022-12-30 15:30:11+00:00,1608847928625434625,https://twitter.com/GiorgiaMeloni/status/16088...,Ecco l’ultimo appuntamento con #gliappuntidiGi...,3385,0,Meloni,FdI
1,2022-12-29 20:55:30+00:00,1608567407500754944,https://twitter.com/GiorgiaMeloni/status/16085...,Congratulations to @netanyahu on the formation...,6021,0,Meloni,FdI
2,2022-12-29 19:48:50+00:00,1608550629005488130,https://twitter.com/GiorgiaMeloni/status/16085...,Grazie al suo estro e alla sua classe è riusci...,32003,0,Meloni,FdI
3,2022-12-29 17:31:05+00:00,1608515965041651712,https://twitter.com/GiorgiaMeloni/status/16085...,Nel corso della conferenza stampa di fine anno...,4146,0,Meloni,FdI
4,2022-12-29 10:38:45+00:00,1608412198182830081,https://twitter.com/GiorgiaMeloni/status/16084...,Conferenza stampa di fine anno. Seguitemi in d...,3523,0,Meloni,FdI


### RoBERTa pre-trained on Twitter

In [54]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import csv
import urllib.request


In [56]:
MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.eval()  # Set to evaluation mode

# Define labels
labels = ['negative', 'neutral', 'positive']

In [57]:
def get_sentiment(text):
    # Preprocess text
    encoded_input = tokenizer(text, return_tensors='pt', truncation=True)
    with torch.no_grad():
        output = model(**encoded_input)
    scores = output.logits[0].numpy()
    scores = torch.nn.functional.softmax(torch.tensor(scores), dim=0)
    return labels[scores.argmax()], scores.numpy()


In [58]:
sampled_tweets = politicians.sample(n=1000, random_state=42)

sentiment_sample = sampled_tweets['Content'].apply(get_sentiment)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [64]:
# Create a DataFrame with the results keeping the index
sentiment_df = pd.DataFrame(sentiment_sample.tolist(), index=sampled_tweets.index, columns=['sentiment', 'scores'])
sentiment_df['scores'] = sentiment_df['scores'].apply(lambda x: x.tolist())
# create a new column with each score
sentiment_df[['negative', 'neutral', 'positive']] = pd.DataFrame(sentiment_df['scores'].tolist(), index=sentiment_df.index)
sentiment_df. drop(columns=['scores'], inplace=True)
sentiment_df

Unnamed: 0,sentiment,negative,neutral,positive
6111,neutral,0.073046,0.867966,0.058988
14778,neutral,0.169538,0.645370,0.185092
5554,neutral,0.090288,0.862527,0.047185
19101,neutral,0.104139,0.834537,0.061324
14322,neutral,0.058969,0.853090,0.087941
...,...,...,...,...
25766,neutral,0.084699,0.858908,0.056393
21142,neutral,0.118631,0.827843,0.053527
5804,neutral,0.069216,0.769665,0.161119
7525,positive,0.010527,0.369351,0.620122


In [66]:
# add the columns of sentiment_df to sampled_tweets
sampled_tweets = sampled_tweets.join(sentiment_df)
sampled_tweets

Unnamed: 0,Date,ID,URL,Content,Likes,Retweets,politician,party,sentiment,negative,neutral,positive
6111,2022-12-17 13:06:53+00:00,1604100821754462209,https://twitter.com/matteosalvini/status/16041...,Era una delle nostre priorità e non abbiamo pe...,163,0,Salvini,Lega,neutral,0.073046,0.867966,0.058988
14778,2022-08-12 08:41:00+00:00,1558010653302030340,https://twitter.com/CarloCalenda/status/155801...,Senza pudore,756,0,Calenda,Azione,neutral,0.169538,0.645370,0.185092
5554,2021-05-03 14:53:08+00:00,1389231557207023622,https://twitter.com/Antonio_Tajani/status/1389...,Bene il #governo che decide di stanziare 2mld ...,43,0,Tajani,FI,neutral,0.090288,0.862527,0.047185
19101,2021-08-01 15:45:26+00:00,1421859627558395904,https://twitter.com/CarloCalenda/status/142185...,@francetomm @marketingpmi Credo che questa pos...,0,0,Calenda,Azione,neutral,0.104139,0.834537,0.061324
14322,2022-09-05 18:28:21+00:00,1566855773283573762,https://twitter.com/CarloCalenda/status/156685...,Anche per SWG arriva il sorpasso su ⁦@forza_it...,2106,0,Calenda,Azione,neutral,0.058969,0.853090,0.087941
...,...,...,...,...,...,...,...,...,...,...,...,...
25766,2022-08-18 15:44:23+00:00,1560291528941109251,https://twitter.com/NFratoianni/status/1560291...,Una mia intervista a #RaiParlamento\n\n#Elezio...,30,0,Fratoianni,EV,neutral,0.084699,0.858908,0.056393
21142,2021-03-11 19:31:15+00:00,1370094988873375748,https://twitter.com/CarloCalenda/status/137009...,@paolo23949716 @amodio_enzo @Azione_it Le mie ...,5,0,Calenda,Azione,neutral,0.118631,0.827843,0.053527
5804,2021-03-04 15:34:40+00:00,1367498739221475330,https://twitter.com/Antonio_Tajani/status/1367...,Iscriviti a @forza_italia.\nEntra a far parte ...,98,0,Tajani,FI,neutral,0.069216,0.769665,0.161119
7525,2022-06-04 20:24:50+00:00,1533183015266000897,https://twitter.com/matteosalvini/status/15331...,Trionfo della Pro Recco nella massima competiz...,133,0,Salvini,Lega,positive,0.010527,0.369351,0.620122


In [67]:
# Apply sentiment analysis
sentiment_results = politicians['Content'].apply(get_sentiment)

In [69]:
# Create a DataFrame with the results keeping the index
result_df = pd.DataFrame(sentiment_results.tolist(), index=politicians.index, columns=['sentiment', 'scores'])
result_df['scores'] = result_df['scores'].apply(lambda x: x.tolist())
# create a new column with each score
result_df[['negative', 'neutral', 'positive']] = pd.DataFrame(result_df['scores'].tolist(), index=result_df.index)
result_df. drop(columns=['scores'], inplace=True)
result_df

Unnamed: 0,sentiment,negative,neutral,positive
0,neutral,0.066206,0.845798,0.087996
1,positive,0.001067,0.027091,0.971842
2,neutral,0.093626,0.834294,0.072081
3,neutral,0.074911,0.866046,0.059043
4,neutral,0.053506,0.870213,0.076281
...,...,...,...,...
27860,neutral,0.096552,0.851842,0.051606
27861,neutral,0.210613,0.751147,0.038240
27862,neutral,0.208396,0.742678,0.048926
27863,neutral,0.224726,0.724392,0.050883


In [70]:
politicians = politicians.join(result_df)
politicians

Unnamed: 0,Date,ID,URL,Content,Likes,Retweets,politician,party,sentiment,negative,neutral,positive
0,2022-12-30 15:30:11+00:00,1608847928625434625,https://twitter.com/GiorgiaMeloni/status/16088...,Ecco l’ultimo appuntamento con #gliappuntidiGi...,3385,0,Meloni,FdI,neutral,0.066206,0.845798,0.087996
1,2022-12-29 20:55:30+00:00,1608567407500754944,https://twitter.com/GiorgiaMeloni/status/16085...,Congratulations to @netanyahu on the formation...,6021,0,Meloni,FdI,positive,0.001067,0.027091,0.971842
2,2022-12-29 19:48:50+00:00,1608550629005488130,https://twitter.com/GiorgiaMeloni/status/16085...,Grazie al suo estro e alla sua classe è riusci...,32003,0,Meloni,FdI,neutral,0.093626,0.834294,0.072081
3,2022-12-29 17:31:05+00:00,1608515965041651712,https://twitter.com/GiorgiaMeloni/status/16085...,Nel corso della conferenza stampa di fine anno...,4146,0,Meloni,FdI,neutral,0.074911,0.866046,0.059043
4,2022-12-29 10:38:45+00:00,1608412198182830081,https://twitter.com/GiorgiaMeloni/status/16084...,Conferenza stampa di fine anno. Seguitemi in d...,3523,0,Meloni,FdI,neutral,0.053506,0.870213,0.076281
...,...,...,...,...,...,...,...,...,...,...,...,...
27860,2021-01-14 13:11:21+00:00,1349705663224623104,https://twitter.com/Maurizio_Lupi/status/13497...,Conte o va al Quirinale o viene in aula subito...,30,0,Lupi,NcI,neutral,0.096552,0.851842,0.051606
27861,2021-01-13 14:40:00+00:00,1349365587122782209,https://twitter.com/Maurizio_Lupi/status/13493...,"Oltre al dannato Covid, oggi c’è un altro viru...",15,0,Lupi,NcI,neutral,0.210613,0.751147,0.038240
27862,2021-01-12 11:36:03+00:00,1348956904836517889,https://twitter.com/Maurizio_Lupi/status/13489...,"L'improvvisazione, le liti, l'incertezza di qu...",57,0,Lupi,NcI,neutral,0.208396,0.742678,0.048926
27863,2021-01-11 12:31:27+00:00,1348608460108275712,https://twitter.com/Maurizio_Lupi/status/13486...,La situazione sta diventando insostenibile e i...,43,0,Lupi,NcI,neutral,0.224726,0.724392,0.050883


In [71]:
# count the number of tweets for each sentiment
sentiment_counts = politicians['sentiment'].value_counts()
sentiment_counts

sentiment
neutral     27117
positive      649
negative       99
Name: count, dtype: int64