In [None]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Install required packages if needed
# !pip install pandas nltk vaderSentiment matplotlib seaborn
nltk.download('stopwords')
nltk.download('wordnet')

: 

In [None]:
# Load dataset
df = pd.read_csv('twitter_dataset.csv')

# Convert timestamp to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Text cleaning function
def clean_text(text):
    # Remove mentions, URLs, and special characters
    text = re.sub(r'@[A-Za-z0-9]+', '', text)  # Mentions
    text = re.sub(r'https?://[A-Za-z0-9./]+', '', text)  # URLs
    text = re.sub(r'[^\w\s]', ' ', text)  # Special characters
    text = re.sub(r'\d+', '', text)  # Numbers
    return text.strip().lower()

# Apply cleaning
df['Clean_Text'] = df['Text'].apply(clean_text)

# Text preprocessing
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = text.split()
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
    return ' '.join(tokens)

df['Processed_Text'] = df['Clean_Text'].apply(preprocess_text)

In [None]:
# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Get sentiment scores
def get_sentiment(text):
    scores = analyzer.polarity_scores(text)
    return scores['compound']  # Returns score between -1 (negative) and +1 (positive)

df['Sentiment_Score'] = df['Processed_Text'].apply(get_sentiment)

# Categorize sentiment
def categorize_sentiment(score):
    if score >= 0.05:
        return 'Positive'
    elif score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

df['Sentiment'] = df['Sentiment_Score'].apply(categorize_sentiment)

In [None]:
# Time-based sentiment trends
df['Date'] = df['Timestamp'].dt.date
daily_sentiment = df.groupby('Date')['Sentiment_Score'].mean().reset_index()

plt.figure(figsize=(12, 6))
sns.lineplot(x='Date', y='Sentiment_Score', data=daily_sentiment)
plt.title('Daily Sentiment Trend')
plt.xlabel('Date')
plt.ylabel('Average Sentiment Score')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Sentiment distribution
plt.figure(figsize=(8, 6))
sns.countplot(
    x='Sentiment',
    data=df,
    order=['Positive', 'Negative', 'Neutral'],
    palette=['green', 'red', 'blue'],
    hue='Sentiment',
    legend=False
)
plt.title('Sentiment Distribution')
plt.show()
