In [None]:
# Import necessary libraries
import pandas as pd
%pip install textblob
from textblob import TextBlob
# Load datasets
# Load the datasets
try:
    words_freq = pd.read_csv("data/words_freq.csv")
    tweets = pd.read_csv("data/tweeets.csv")
    print(f"Words dataset shape: {words_freq.shape}")
    print(f"Tweets dataset shape: {tweets.shape}")
except FileNotFoundError as e:
    print(f"Error: {e}. Ensure the file paths are correct.")
    exit()

# Input Wordle word for sentiment analysis
word = input("Enter a Wordle word for sentiment analysis (5 letters): ").lower()

# Validate the word
if not word.isalpha() or len(word) != 5:
    print("Error: Please enter a valid 5-letter word using alphabetic characters.")
    exit()
                                                                                  
# Check if the word exists in the words_freq dataset
word_entry = words_freq[words_freq["word"].str.lower() == word]

if word_entry.empty:
    print(f"The word '{word}' was not found in the dataset.")
    exit()

# Get Wordle day and filter tweets
wordle_day = int(word_entry.iloc[0]["day"])
wordle_tweets = tweets[tweets["wordle_id"] == wordle_day]

if wordle_tweets.empty:
    print(f"No tweets found for Wordle #{wordle_day}.")
    exit()

# Analyze sentiment
print("Analyzing tweets...")
sentiments = {"positive": 0, "neutral": 0, "negative": 0}
polarity_scores = []

for idx, row in wordle_tweets.iterrows():
    text = row["tweet_text"]
    
    # Skip grid-only tweets
    if text.count('\n') <= 1 and text.startswith("Wordle"):
        continue

    try:
        # Clean tweet text
        cleaned_text = ' '.join([
            line for line in text.split('\n') 
            if not line.strip().startswith(('Wordle', '⬛', '⬜', '🟨', '🟩'))
        ])
        
        if cleaned_text.strip():
            # Sentiment analysis
            analysis = TextBlob(cleaned_text)
            polarity = analysis.sentiment.polarity
            polarity_scores.append(polarity)

            if polarity > 0:
                sentiments["positive"] += 1
            elif polarity < 0:
                sentiments["negative"] += 1
            else:
                sentiments["neutral"] += 1
    except Exception as e:
        print(f"Error analyzing tweet {idx}: {e}")
        continue

# Display results
total = sum(sentiments.values())
if total == 0:
    print("No valid tweets found for analysis.")
else:
    avg_sentiment = sum(polarity_scores) / len(polarity_scores)
    print(f"\nResults for '{word}' (Wordle #{wordle_day}):")
    print(f"Total Tweets Analyzed: {total}")
    print(f"Average Sentiment: {'😊 Positive' if avg_sentiment > 0 else '😐 Neutral' if avg_sentiment == 0 else '😟 Negative'} ({avg_sentiment:.3f})")
    print(f"Sentiment Breakdown: {sentiments}")

Note: you may need to restart the kernel to use updated packages.
Words dataset shape: (12972, 3)
Tweets dataset shape: (1178454, 5)
Analyzing tweets...

Results for 'cacao' (Wordle #373):
Total Tweets Analyzed: 341
Average Sentiment: 😊 Positive (0.009)
Sentiment Breakdown: {'positive': 53, 'neutral': 239, 'negative': 49}
