In [None]:
# these are the libraries we are going to neek
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tqdm import tqdm

In [None]:
# Descargar VADER (solo primera vez)
nltk.download('vader_lexicon', quiet=True)
nltk.download('stopwords')
nltk.download('punkt')

In [3]:
# we load the data
df = pd.read_excel("data.xlsx")

In [7]:
# let's create the stopword list
stop_words = set(stopwords.words('english'))
# we are going to tonekize each sentence 
def tokenize_message(text):
    return [word for word in word_tokenize(str(text).lower()) 
            if word not in stop_words]

df['tokens'] = df['Message'].apply(tokenize_message)

In [8]:
# Aplicar TextBlob a cada mensaje individualmente
df['sentiment_analysis'] = df['Message'].apply(
    lambda x: TextBlob(str(x), analyzer=NaiveBayesAnalyzer()).sentiment
)

# Ver resultados
print(df[['Message', 'sentiment_analysis']].head())

NameError: name 'TextBlob' is not defined

In [None]:
# initiallize
sia = SentimentIntensityAnalyzer()

# definning the classification: according to polarity scores, we will have positive, negative, or neutral score
# we also have the compound score, which is a weighted composite of the three (not a simple average)
def analyze_vader_batch(messages):
    results = []
    for msg in tqdm(messages, desc="Procesando"):
        scores = sia.polarity_scores(str(msg))
        results.append({
            'vader_compound': scores['compound'],
            'vader_positive': scores['pos'],
            'vader_negative': scores['neg'],
            'vader_neutral': scores['neu']
        })
    return pd.DataFrame(results)

# we will have the polarity scores in four extra columns, which are the vader_results
vader_results = analyze_vader_batch(df['Message'].tolist())
df = pd.concat([df, vader_results], axis=1)

# and according to the compound score, we will have the final classification
# Using standard VADER thresholds: â‰¥0.05 positive, â‰¤-0.05 negative, else neutral
df['sentiment_label'] = df['vader_compound'].apply(
    lambda x: 'POSITIVO' if x >= 0.05 else 'NEGATIVO' if x <= -0.05 else 'NEUTRAL'
)

ðŸš€ Analizando con VADER...


Procesando: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 998/998 [00:00<00:00, 3620.09it/s]


In [12]:
# a sample
df

Unnamed: 0,Candidate,DateTime,Post ID,Message,Date,Post URL,tokens,vader_compound,vader_positive,vader_negative,vader_neutral,sentiment_label
0,Clinton,2016-07-15 13:00:01,889307941125736_1189151237808070,"If you like Donald Trump, youâ€™re going to love...",2016-07-15,https://facebook.com/889307941125736/posts/118...,"[like, donald, trump, ,, â€™, going, love, choic...",0.7717,0.358,0.000,0.642,POSITIVO
1,Clinton,2016-07-15 11:00:28,889307941125736_1189077061148821,This election isnâ€™t just a choice between two ...,2016-07-15,https://facebook.com/889307941125736/posts/118...,"[election, â€™, choice, two, partiesâ€”it, â€™, deci...",0.0000,0.000,0.000,1.000,NEUTRAL
2,Clinton,2016-07-15 10:30:12,889307941125736_1189051547818039,If you ever had any doubts about Donald J. Tru...,2016-07-15,https://facebook.com/889307941125736/posts/118...,"[ever, doubts, donald, j., trump, sticking, di...",-0.6486,0.000,0.187,0.813,NEGATIVO
3,Clinton,2016-07-15 10:01:34,889307941125736_1189019197821274,It's official: Donald J. Trump just chose Indi...,2016-07-15,https://facebook.com/889307941125736/posts/118...,"['s, official, :, donald, j., trump, chose, in...",0.0000,0.000,0.000,1.000,NEUTRAL
4,Clinton,2016-07-15 09:40:27,889307941125736_1189009067822287,"Thank you, Virginia and Senator Tim Kaine.",2016-07-15,https://facebook.com/889307941125736/posts/118...,"[thank, ,, virginia, senator, tim, kaine, .]",0.3612,0.294,0.000,0.706,POSITIVO
...,...,...,...,...,...,...,...,...,...,...,...,...
993,Trump,2016-05-02 06:00:11,153080620724_10156993424940725,I will easily beat Crooked Hillary Clinton in ...,2016-05-02,https://facebook.com/153080620724/posts/101569...,"[easily, beat, crooked, hillary, clinton, gene...",0.8087,0.213,0.056,0.731,POSITIVO
994,Trump,2016-05-01 17:53:37,153080620724_10156992889270725,"I love you Fort Wayne, Indiana! What a great e...",2016-05-01,https://facebook.com/153080620724/posts/101569...,"[love, fort, wayne, ,, indiana, !, great, even...",0.9203,0.296,0.059,0.646,POSITIVO
995,Trump,2016-05-01 13:30:59,153080620724_10156991970810725,"WOW! I am departing Terre Haute, Indiana now. ...",2016-05-01,https://facebook.com/153080620724/posts/101569...,"[wow, !, departing, terre, haute, ,, indiana, ...",0.9631,0.386,0.000,0.614,POSITIVO
996,Trump,2016-05-01 11:51:35,153080620724_10156991664675725,Great new poll out of Indiana! THANK YOU! I am...,2016-05-01,https://facebook.com/153080620724/posts/101569...,"[great, new, poll, indiana, !, thank, !, terre...",0.9614,0.407,0.000,0.593,POSITIVO


In [None]:
# we save it in another Excel file
df.to_excel('analisis_sentimientos_completo.xlsx', index=False)