In [2]:
import pandas as pd

df = pd.read_csv('tweets-data.csv')  # Use relative path since it's in current directory
print("Dataset shape:", df.shape)
print(df.head())


Dataset shape: (3010, 6)
   Unnamed: 0               Date Created  Number of Likes  Source of Tweet  \
0           0  2023-06-25 19:16:20+00:00                0              NaN   
1           1  2023-06-25 19:16:18+00:00                0              NaN   
2           2  2023-06-25 19:16:07+00:00                0              NaN   
3           3  2023-06-25 19:15:56+00:00                0              NaN   
4           4  2023-06-25 19:15:54+00:00                0              NaN   

                                              Tweets hashtag  
0  @jacksonhinklle #wagner with 6.2 billion dolla...  wagner  
1  Pobrecito es discapacitado\n#Reddetuiterosdemo...  wagner  
2  News from the EIR Daily Alert\n\n“#Putin Addre...  wagner  
3  It's Messi day #Messi𓃵 #Messi36 #Russia #bigst...  wagner  
4  Il passaggio chiave di Machiavelli era questo ...  wagner  


In [5]:
import re

def clean_text(text):
    text = text.lower()
    text = re.sub(r'http\S+|www.\S+', '', text)  # remove URLs
    text = re.sub(r'@\w+', '', text)            # remove mentions
    text = re.sub(r'#', '', text)               # remove hashtags symbol
    text = re.sub(r'[^a-z\s]', '', text)        # remove punctuation/numbers
    text = re.sub(r'\s+', ' ', text).strip()    # remove extra spaces
    return text

df['cleaned_text'] = df['Tweets'].astype(str).apply(clean_text)
print(df[['Tweets', 'cleaned_text']].head())


                                              Tweets  \
0  @jacksonhinklle #wagner with 6.2 billion dolla...   
1  Pobrecito es discapacitado\n#Reddetuiterosdemo...   
2  News from the EIR Daily Alert\n\n“#Putin Addre...   
3  It's Messi day #Messi𓃵 #Messi36 #Russia #bigst...   
4  Il passaggio chiave di Machiavelli era questo ...   

                                        cleaned_text  
0                         wagner with billion dollar  
1  pobrecito es discapacitado reddetuiterosdemocr...  
2  news from the eir daily alert putin addressed ...  
3  its messi day messi messi russia bigstage wagn...  
4  il passaggio chiave di machiavelli era questo ...  


In [6]:
!pip install vaderSentiment


Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [7]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):
    scores = analyzer.polarity_scores(text)
    compound = scores['compound']
    if compound >= 0.05:
        label = 'positive'
    elif compound <= -0.05:
        label = 'negative'
    else:
        label = 'neutral'
    return pd.Series([label, compound])

df[['sentiment_label', 'sentiment_score']] = df['cleaned_text'].apply(get_sentiment)
print(df[['cleaned_text', 'sentiment_label', 'sentiment_score']].head())


                                        cleaned_text sentiment_label  \
0                         wagner with billion dollar         neutral   
1  pobrecito es discapacitado reddetuiterosdemocr...        positive   
2  news from the eir daily alert putin addressed ...        positive   
3  its messi day messi messi russia bigstage wagn...         neutral   
4  il passaggio chiave di machiavelli era questo ...         neutral   

   sentiment_score  
0            0.000  
1            0.340  
2            0.296  
3            0.000  
4            0.000  
