In [44]:
#imports
import pandas as pd
import numpy as np
import re
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from collections import Counter
import warnings
import string
warnings.filterwarnings('ignore')

In [45]:
# Text processing
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer

# Download required NLTK data
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)


True

In [46]:
def harmonize_reddit_data(df):
    """Standardize Reddit data to unified schema (VADER version)"""
    
    # Create unified schema
    harmonized = pd.DataFrame()
    
    # Core text fields
    harmonized['text'] = df['text']
    harmonized['text_length'] = df['comment_length']
    
    # Sentiment - VADER scores
    harmonized['sentiment_compound'] = df['sentiment_polarity']  # VADER compound score
    harmonized['sentiment_positive'] = df['sentiment_positive']  # VADER positive score
    harmonized['sentiment_negative'] = df['sentiment_negative']  # VADER negative score
    harmonized['sentiment_neutral'] = df['sentiment_neutral']    # VADER neutral score
    harmonized['sentiment_subjectivity'] = df['sentiment_subjectivity']
    
    # Sentiment label (updated threshold for VADER)
    harmonized['sentiment_label'] = df.get('sentiment_label', 
                                            df['sentiment_polarity'].apply(
                                                lambda x: 'positive' if x >= 0.05 else ('negative' if x <= -0.05 else 'neutral')))
    
    # Engagement metrics
    harmonized['likes'] = df['likes']
    harmonized['replies'] = df['num_replies']
    
    # Temporal
    harmonized['created_at'] = pd.to_datetime(df['created_utc'], errors='coerce')
    
    # Source identification
    harmonized['source'] = 'reddit'
    harmonized['source_id'] = df['source_id']
    
    # Platform-specific metadata
    harmonized['platform_post_id'] = df['post_id']
    harmonized['platform_post_title'] = df['post_title']
    harmonized['platform_community'] = df['subreddit']
    harmonized['platform_post_score'] = df['post_score']
    harmonized['platform_post_engagement'] = df['post_num_comments']
    
    # Content flags
    harmonized['contains_ai'] = df['contains_ai']
    harmonized['contains_opinion'] = df['contains_opinion']
    harmonized['contains_societal'] = df['contains_societal']
    
    return harmonized

def harmonize_youtube_data(df):
    """Standardize YouTube data to unified schema (VADER version)"""
    
    # Create unified schema
    harmonized = pd.DataFrame()
    
    # Core text fields
    harmonized['text'] = df['text']
    harmonized['text_length'] = df['comment_length']
    
    # Sentiment - VADER scores
    harmonized['sentiment_compound'] = df['sentiment_polarity']  # VADER compound score
    harmonized['sentiment_positive'] = df['sentiment_positive']  # VADER positive score
    harmonized['sentiment_negative'] = df['sentiment_negative']  # VADER negative score
    harmonized['sentiment_neutral'] = df['sentiment_neutral']    # VADER neutral score
    harmonized['sentiment_subjectivity'] = df['sentiment_subjectivity']
    
    # Sentiment label (updated threshold for VADER)
    harmonized['sentiment_label'] = df.get('sentiment_label',
                                            df['sentiment_polarity'].apply(
                                                lambda x: 'positive' if x >= 0.05 else ('negative' if x <= -0.05 else 'neutral')))
    
    # Engagement metrics
    harmonized['likes'] = df['likes']
    harmonized['replies'] = df['num_replies']
    
    # Temporal
    harmonized['created_at'] = pd.to_datetime(df['created_utc'], errors='coerce')
    
    # Source identification
    harmonized['source'] = 'youtube'
    harmonized['source_id'] = df['source_id']
    
    # Platform-specific metadata
    harmonized['platform_post_id'] = df['video_id']
    harmonized['platform_post_title'] = df['video_title']
    harmonized['platform_community'] = df['video_channel']
    harmonized['platform_post_score'] = df['video_like_count']
    harmonized['platform_post_engagement'] = df['video_comment_count']
    
    # Content flags
    harmonized['contains_ai'] = df['contains_ai']
    harmonized['contains_opinion'] = df['contains_opinion']
    harmonized['contains_societal'] = df['contains_societal']
    
    return harmonized

# Example usage
if __name__ == "__main__":
    # Load the data
    reddit_df = pd.read_csv('../data/reddit_ai.csv')
    youtube_df = pd.read_csv('../data/youtube_ai.csv')
    
    # Harmonize datasets
    reddit_harmonized = harmonize_reddit_data(reddit_df)
    youtube_harmonized = harmonize_youtube_data(youtube_df)
    
    # Merge datasets
    merged_df = pd.concat([reddit_harmonized, youtube_harmonized], ignore_index=True)
    
    # Print summary statistics
    print("="*60)
    print("DATA HARMONIZATION COMPLETE")
    print("="*60)
    print(f"\nTotal comments: {len(merged_df)}")
    print(f"Reddit comments: {len(reddit_harmonized)}")
    print(f"YouTube comments: {len(youtube_harmonized)}")
    
    print(f"\nSentiment distribution:")
    print(merged_df['sentiment_label'].value_counts())
    
    print(f"\nAverage VADER scores:")
    print(f"  Compound: {merged_df['sentiment_compound'].mean():.3f}")
    print(f"  Positive: {merged_df['sentiment_positive'].mean():.3f}")
    print(f"  Negative: {merged_df['sentiment_negative'].mean():.3f}")
    print(f"  Neutral: {merged_df['sentiment_neutral'].mean():.3f}")
    
    print(f"\nDate range:")
    print(f"  Earliest: {merged_df['created_at'].min()}")
    print(f"  Latest: {merged_df['created_at'].max()}")
    
    print(f"\nTop communities:")
    print(merged_df['platform_community'].value_counts().head(10))
    
    print(f"\nContent flags:")
    print(f"  Contains AI keywords: {merged_df['contains_ai'].sum()}")
    print(f"  Contains opinion keywords: {merged_df['contains_opinion'].sum()}")
    print(f"  Contains societal keywords: {merged_df['contains_societal'].sum()}")

DATA HARMONIZATION COMPLETE

Total comments: 3099
Reddit comments: 1012
YouTube comments: 2087

Sentiment distribution:
sentiment_label
positive    2052
negative     958
neutral       89
Name: count, dtype: int64

Average VADER scores:
  Compound: 0.260
  Positive: 0.122
  Negative: 0.072
  Neutral: 0.806

Date range:
  Earliest: 2021-10-20 04:04:18+00:00
  Latest: 2025-12-07 04:55:25+00:00

Top communities:
platform_community
MachineLearning           406
TED                       400
LocalLLaMA                271
CNN                       200
ChatGPT                   169
CNBC Television           116
Senator Bernie Sanders    100
Fireship                  100
WIRED                     100
The Diary Of A CEO        100
Name: count, dtype: int64

Content flags:
  Contains AI keywords: 3099
  Contains opinion keywords: 2845
  Contains societal keywords: 1230


In [47]:
merged_df

Unnamed: 0,text,text_length,sentiment_compound,sentiment_positive,sentiment_negative,sentiment_neutral,sentiment_subjectivity,sentiment_label,likes,replies,...,source,source_id,platform_post_id,platform_post_title,platform_community,platform_post_score,platform_post_engagement,contains_ai,contains_opinion,contains_societal
0,"Post is definitely worded or written by an AI,...",126,0.9890,0.254,0.015,0.731,0.86950,positive,1,1,...,reddit,reddit_comment_novuyda,1ox5xu0,[D] Let's discuss World Models,MachineLearning,0,6,True,True,True
1,Yes post is refined using LLM\n\nHowever follo...,133,0.8689,0.084,0.000,0.916,0.55945,positive,-6,1,...,reddit,reddit_comment_nov7r5c,1ox5xu0,[D] Let's discuss World Models,MachineLearning,0,6,True,True,False
2,"I mean, asking the people that have a culturua...",48,-0.1280,0.133,0.142,0.725,0.06400,negative,2,1,...,reddit,reddit_comment_n9v0fig,1mvmlbw,[R] What do people expect from AI in the next ...,MachineLearning,9,8,True,True,True
3,"Interesting, for me it opens a page for the pa...",32,0.8020,0.220,0.000,0.780,0.40100,positive,1,0,...,reddit,reddit_comment_n9rgz0a,1mvmlbw,[R] What do people expect from AI in the next ...,MachineLearning,9,8,True,True,True
4,I've seen an increasing rise in physics and en...,57,0.5584,0.151,0.096,0.753,0.27920,positive,201,4,...,reddit,reddit_comment_mzsw5l7,1lkmkuw,[D] Alarming amount of schizoid people being v...,MachineLearning,325,156,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3094,"Guys, all of these scenarios are basically sci...",37,0.8462,0.212,0.000,0.788,0.42310,positive,12,1,...,youtube,youtube_comment_None,hnr7-VNHJoU,Every AI Existential Risk Explained,The Paint Explainer,13949,1059,True,False,True
3095,Humanity just wouldn't stop making new tools u...,88,0.5615,0.078,0.029,0.893,0.40575,positive,10,1,...,youtube,youtube_comment_None,hnr7-VNHJoU,Every AI Existential Risk Explained,The Paint Explainer,13949,1059,True,True,True
3096,The alternative to developing AGI is facing st...,41,-0.8759,0.000,0.230,0.770,0.56295,negative,14,2,...,youtube,youtube_comment_None,hnr7-VNHJoU,Every AI Existential Risk Explained,The Paint Explainer,13949,1059,True,True,False
3097,Hearing all of these fear-based predictions fr...,74,-0.3612,0.107,0.137,0.756,0.18060,negative,8,0,...,youtube,youtube_comment_None,hnr7-VNHJoU,Every AI Existential Risk Explained,The Paint Explainer,13949,1059,True,True,True


In [48]:
# check for missing values
merged_df.isna().sum()

text                        0
text_length                 0
sentiment_compound          0
sentiment_positive          0
sentiment_negative          0
sentiment_neutral           0
sentiment_subjectivity      0
sentiment_label             0
likes                       0
replies                     0
created_at                  0
source                      0
source_id                   0
platform_post_id            0
platform_post_title         0
platform_community          0
platform_post_score         0
platform_post_engagement    0
contains_ai                 0
contains_opinion            0
contains_societal           0
dtype: int64

In [49]:
# Drop duplicates by text
merged_df = merged_df.drop_duplicates(subset=['text'], keep='first')


In [50]:
merged_df

Unnamed: 0,text,text_length,sentiment_compound,sentiment_positive,sentiment_negative,sentiment_neutral,sentiment_subjectivity,sentiment_label,likes,replies,...,source,source_id,platform_post_id,platform_post_title,platform_community,platform_post_score,platform_post_engagement,contains_ai,contains_opinion,contains_societal
0,"Post is definitely worded or written by an AI,...",126,0.9890,0.254,0.015,0.731,0.86950,positive,1,1,...,reddit,reddit_comment_novuyda,1ox5xu0,[D] Let's discuss World Models,MachineLearning,0,6,True,True,True
1,Yes post is refined using LLM\n\nHowever follo...,133,0.8689,0.084,0.000,0.916,0.55945,positive,-6,1,...,reddit,reddit_comment_nov7r5c,1ox5xu0,[D] Let's discuss World Models,MachineLearning,0,6,True,True,False
2,"I mean, asking the people that have a culturua...",48,-0.1280,0.133,0.142,0.725,0.06400,negative,2,1,...,reddit,reddit_comment_n9v0fig,1mvmlbw,[R] What do people expect from AI in the next ...,MachineLearning,9,8,True,True,True
3,"Interesting, for me it opens a page for the pa...",32,0.8020,0.220,0.000,0.780,0.40100,positive,1,0,...,reddit,reddit_comment_n9rgz0a,1mvmlbw,[R] What do people expect from AI in the next ...,MachineLearning,9,8,True,True,True
4,I've seen an increasing rise in physics and en...,57,0.5584,0.151,0.096,0.753,0.27920,positive,201,4,...,reddit,reddit_comment_mzsw5l7,1lkmkuw,[D] Alarming amount of schizoid people being v...,MachineLearning,325,156,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3094,"Guys, all of these scenarios are basically sci...",37,0.8462,0.212,0.000,0.788,0.42310,positive,12,1,...,youtube,youtube_comment_None,hnr7-VNHJoU,Every AI Existential Risk Explained,The Paint Explainer,13949,1059,True,False,True
3095,Humanity just wouldn't stop making new tools u...,88,0.5615,0.078,0.029,0.893,0.40575,positive,10,1,...,youtube,youtube_comment_None,hnr7-VNHJoU,Every AI Existential Risk Explained,The Paint Explainer,13949,1059,True,True,True
3096,The alternative to developing AGI is facing st...,41,-0.8759,0.000,0.230,0.770,0.56295,negative,14,2,...,youtube,youtube_comment_None,hnr7-VNHJoU,Every AI Existential Risk Explained,The Paint Explainer,13949,1059,True,True,False
3097,Hearing all of these fear-based predictions fr...,74,-0.3612,0.107,0.137,0.756,0.18060,negative,8,0,...,youtube,youtube_comment_None,hnr7-VNHJoU,Every AI Existential Risk Explained,The Paint Explainer,13949,1059,True,True,True


In [51]:
class TextCleaner:
    """Comprehensive text cleaning and preprocessing"""
    
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))
        self.lemmatizer = WordNetLemmatizer()
        self.stemmer = PorterStemmer()
        
        # Custom patterns
        self.url_pattern = re.compile(r'http\S+|www\.\S+|https\S+')
        self.email_pattern = re.compile(r'\S+@\S+')
        self.mention_pattern = re.compile(r'@\w+')
        self.hashtag_pattern = re.compile(r'#\w+')
        self.number_pattern = re.compile(r'\d+')
        
    def remove_urls(self, text):
        """Remove URLs"""
        return self.url_pattern.sub('', text)
    
    def remove_emails(self, text):
        """Remove email addresses"""
        return self.email_pattern.sub('', text)
    
    def remove_mentions(self, text):
        """Remove social media mentions"""
        return self.mention_pattern.sub('', text)
    
    def remove_hashtags(self, text):
        """Remove hashtags but keep the text"""
        return self.hashtag_pattern.sub(lambda m: m.group(0)[1:], text)
    
    def remove_extra_whitespace(self, text):
        """Remove extra whitespace"""
        return ' '.join(text.split())
    
    def lowercase(self, text):
        """Convert to lowercase"""
        return text.lower()
    
    def remove_punctuation(self, text, keep_sentence_end=True):
        """Remove punctuation, optionally keep sentence endings"""
        if keep_sentence_end:
            # Keep . ! ? for sentence structure
            translator = str.maketrans('', '', string.punctuation.replace('.', '').replace('!', '').replace('?', ''))
        else:
            translator = str.maketrans('', '', string.punctuation)
        return text.translate(translator)
    
    def remove_numbers(self, text):
        """Remove numbers"""
        return self.number_pattern.sub('', text)
    
    def remove_stopwords(self, text):
        """Remove stopwords"""
        words = word_tokenize(text)
        filtered = [w for w in words if w.lower() not in self.stop_words]
        return ' '.join(filtered)
    
    def lemmatize(self, text):
        """Lemmatize text"""
        words = word_tokenize(text)
        lemmatized = [self.lemmatizer.lemmatize(w) for w in words]
        return ' '.join(lemmatized)
    
    def stem(self, text):
        """Stem text"""
        words = word_tokenize(text)
        stemmed = [self.stemmer.stem(w) for w in words]
        return ' '.join(stemmed)
    
    def clean_basic(self, text):
        """Basic cleaning: URLs, emails, whitespace, lowercase"""
        if pd.isna(text) or not isinstance(text, str):
            return ""
        
        text = self.remove_urls(text)
        text = self.remove_emails(text)
        text = self.remove_mentions(text)
        text = self.remove_hashtags(text)
        text = self.lowercase(text)
        text = self.remove_extra_whitespace(text)
        
        return text.strip()
    
    def clean_standard(self, text):
        """Standard cleaning: basic + punctuation + numbers"""
        text = self.clean_basic(text)
        text = self.remove_punctuation(text, keep_sentence_end=False)
        text = self.remove_numbers(text)
        text = self.remove_extra_whitespace(text)
        
        return text.strip()
    
    def clean_aggressive(self, text):
        """Aggressive cleaning: standard + stopwords + lemmatization"""
        text = self.clean_standard(text)
        text = self.remove_stopwords(text)
        text = self.lemmatize(text)
        text = self.remove_extra_whitespace(text)
        
        return text.strip()

In [59]:
def clean_for_tfidf(text, cleaner):
    """
    Optimized cleaning for TF-IDF:
    - Aggressive preprocessing: lowercase, no punctuation, no stopwords
    - Lemmatization to reduce vocabulary
    - Remove numbers (they're not meaningful for TF-IDF)
    - Goal: Clean bag-of-words representation
    """
    if pd.isna(text) or not isinstance(text, str):
        return ""
    
    # Basic cleaning
    text = cleaner.remove_urls(text)
    text = cleaner.remove_emails(text)
    text = cleaner.remove_mentions(text)
    text = cleaner.remove_hashtags(text)
    text = cleaner.lowercase(text)
    
    # Remove punctuation and numbers
    text = cleaner.remove_punctuation(text, keep_sentence_end=False)
    text = cleaner.remove_numbers(text)
    
    # Remove stopwords (they don't add value to TF-IDF)
    text = cleaner.remove_stopwords(text)
    
    # Lemmatize to reduce vocabulary size
    text = cleaner.lemmatize(text)
    
    # Clean whitespace
    text = cleaner.remove_extra_whitespace(text)
    
    return text.strip()

def clean_for_bert(text, cleaner):
    """
    Optimized cleaning for BERT:
    - Minimal preprocessing: preserve context and semantics
    - Keep punctuation (BERT uses it for understanding)
    - Keep stopwords (BERT learns from them)
    - Keep case variations (BERT has case-sensitive and case-insensitive versions)
    - Keep numbers (can be contextually important)
    - Goal: Natural, contextual text that BERT can understand
    """
    if pd.isna(text) or not isinstance(text, str):
        return ""
    
    # Only remove noise that doesn't add meaning
    text = cleaner.remove_urls(text)
    text = cleaner.remove_emails(text)
    
    # Keep mentions and hashtags as they might have semantic value
    # Just remove the @ and # symbols
    text = text.replace('@', '').replace('#', '')
    
    # Clean extra whitespace but preserve structure
    text = cleaner.remove_extra_whitespace(text)
    
    # Keep original case, punctuation, numbers, stopwords
    # BERT's tokenizer will handle these appropriately
    
    return text.strip()

In [60]:
def engineer_features(df):
    """Create additional features for analysis (VADER version)"""
    
    df = df.copy()
    
    # --- Temporal Features ---
    df['year'] = df['created_at'].dt.year
    df['month'] = df['created_at'].dt.month
    df['day_of_week'] = df['created_at'].dt.dayofweek
    df['hour'] = df['created_at'].dt.hour
    
    # --- Text Features (using basic clean for consistency) ---
    df['word_count'] = df['text_clean'].apply(lambda x: len(str(x).split()))
    
    # Punctuation features (use original text to preserve punctuation)
    df['exclamation_count'] = df['text'].apply(lambda x: str(x).count('!'))
    df['question_count'] = df['text'].apply(lambda x: str(x).count('?'))
    df['period_count'] = df['text'].apply(lambda x: str(x).count('.'))
    
    # --- Engagement Features ---
    df['engagement_score'] = df['likes'] + (df['replies'] * 2)  # Weight replies higher
    
    # Log transform for skewed features (handles zeros)
    df['engagement_log'] = np.log1p(df['engagement_score'])
    
    # --- VADER Sentiment Features (UPDATED) ---
    # Use VADER's compound score with adjusted thresholds
    df['is_positive'] = (df['sentiment_compound'] >= 0.05).astype(int)
    df['is_negative'] = (df['sentiment_compound'] <= -0.05).astype(int)
    df['is_neutral'] = ((df['sentiment_compound'] > -0.05) & (df['sentiment_compound'] < 0.05)).astype(int)
    
    # Sentiment magnitude (absolute value of compound)
    df['sentiment_magnitude'] = np.abs(df['sentiment_compound'])
    
    # --- Platform Features ---
    # Community popularity (number of comments from same community)
    df['community_size'] = df.groupby('platform_community')['platform_community'].transform('count')
    
    return df

In [61]:
def clean_and_engineer_features(df):
    """Complete text cleaning and feature engineering pipeline"""
    
    print("="*60)
    print("TEXT CLEANING & FEATURE ENGINEERING")
    print("="*60)
    
    # Initialize cleaner
    cleaner = TextCleaner()
    
    # 1. Text Cleaning
    print("\n1. Cleaning text for TF-IDF (aggressive preprocessing)...")
    df['text_tfidf'] = df['text'].apply(lambda x: clean_for_tfidf(x, cleaner))
    
    print("2. Cleaning text for BERT (minimal preprocessing)...")
    df['text_bert'] = df['text'].apply(lambda x: clean_for_bert(x, cleaner))
    
    print("3. Creating basic cleaned version for EDA...")
    df['text_clean'] = df['text'].apply(cleaner.clean_basic)
    
    # 2. Feature Engineering
    print("\n4. Engineering features...")
    df = engineer_features(df)
    
    # 3. Remove any rows with empty cleaned text
    initial_count = len(df)
    df = df[df['text_clean'].str.len() > 0]
    removed = initial_count - len(df)
    if removed > 0:
        print(f"\n⚠ Removed {removed} comments with empty text after cleaning")
    
    print("\n✓ Text cleaning and feature engineering complete!")
    print(f"Final dataset: {len(df)} comments")
    
    # Show new columns
    print(f"\nNew columns added: {len(df.columns) - initial_count}")
    
    return df

if __name__ == "__main__":
    # Load data
    df = merged_df
    
    # Ensure created_at is datetime
    df['created_at'] = pd.to_datetime(df['created_at'])
    
    # Clean and engineer features
    df_processed = clean_and_engineer_features(df)
    
    # Save processed data
    df_processed.to_csv('../data/final_df.csv', index=False)
    
    # Show summary
    print("\n" + "="*60)
    print("FEATURE SUMMARY")
    print("="*60)
    
    print("\n--- Temporal Features ---")
    print(f"Date range: {df_processed['created_at'].min()} to {df_processed['created_at'].max()}")
    print(f"Years covered: {sorted(df_processed['year'].unique())}")
    
    print("\n--- Text Features ---")
    print(f"Avg word count: {df_processed['word_count'].mean():.1f}")
    
    print("\n--- Sentiment Features ---")
    print(f"Positive: {df_processed['is_positive'].sum()} ({df_processed['is_positive'].mean()*100:.1f}%)")
    print(f"Negative: {df_processed['is_negative'].sum()} ({df_processed['is_negative'].mean()*100:.1f}%)")
    print(f"Neutral: {df_processed['is_neutral'].sum()} ({df_processed['is_neutral'].mean()*100:.1f}%)")
    
    print("\n--- Engagement Features ---")
    print(f"Avg likes: {df_processed['likes'].mean():.1f}")
    print(f"Avg replies: {df_processed['replies'].mean():.1f}")
    print(f"Avg engagement: {df_processed['engagement_score'].mean():.1f}")

TEXT CLEANING & FEATURE ENGINEERING

1. Cleaning text for TF-IDF (aggressive preprocessing)...
2. Cleaning text for BERT (minimal preprocessing)...
3. Creating basic cleaned version for EDA...

4. Engineering features...

✓ Text cleaning and feature engineering complete!
Final dataset: 2985 comments

New columns added: -2946

FEATURE SUMMARY

--- Temporal Features ---
Date range: 2021-10-20 04:04:18+00:00 to 2025-12-07 04:55:25+00:00
Years covered: [np.int32(2021), np.int32(2022), np.int32(2023), np.int32(2024), np.int32(2025)]

--- Text Features ---
Avg word count: 69.4

--- Sentiment Features ---
Positive: 1991 (66.7%)
Negative: 906 (30.4%)
Neutral: 88 (2.9%)

--- Engagement Features ---
Avg likes: 39.3
Avg replies: 1.7
Avg engagement: 42.7


In [None]:
df_processed

In [None]:
merged_df