In [2]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Load CSV file
csvFile = pd.read_csv('transcript_data_combined.csv')

# Download VADER lexicon
nltk.download('vader_lexicon')

# Set up the analyzer
analyzer = SentimentIntensityAnalyzer()

# Define expanded custom lexicon for political-related sentiments
def get_political_sentiment(sentence):
    sid = SentimentIntensityAnalyzer()
    # Expanded custom lexicon for political sentiments
    politicalWords = {
        # Positive Sentiment Words
        'progress': 2.0, 'peaceful': 2.5, 'visionary': 2.3, 'hopeful': 2.2,
        'inclusive': 2.0, 'unity': 1.8, 'reform': 1.7, 'growth': 1.9,
        'prosperity': 2.0, 'strong': 2.4, 'freedom': 1.8, 'justice': 1.9,
        'opportunity': 2.5, 'equality': 2.5, 'empower': 2.4, 'resolve': 2.3,
        'bright': 2.2, 'leadership': 2.1, 'diplomacy': 2.0, 'accountability': 2.1,
        'collaboration': 2.0, 'integrity': 2.3, 'innovation': 2.0, 'resilience': 1.9,
        'solidarity': 2.2, 'compassion': 2.4, 'inclusive': 2.3, 'patriotism': 2.1,
        'courage': 2.5, 'dedication': 2.4, 'responsibility': 2.3, 'sacrifice': 2.2,

        # Negative Sentiment Words
        'divisive': -2.5, 'corruption': -2.0, 'crisis': -2.2, 'chaos': -2.3,
        'conflict': -2.5, 'threat': -2.0, 'oppression': -1.8, 'failure': -2.5,
        'unrest': -2.1, 'inequality': -2.3, 'exploitation': -2.0, 'injustice': -2.4,
        'scandal': -2.5, 'lies': -2.4, 'betrayal': -2.3, 'turmoil': -2.1,
        'fear': -2.3, 'authoritarian': -2.4, 'censorship': -2.2, 'division': -2.0,
        'repression': -2.3, 'mismanagement': -2.1, 'controversy': -1.9, 'bias': -2.2,
        'hypocrisy': -2.4, 'slander': -2.1, 'greed': -2.5, 'violence': -2.4,
        'deception': -2.5, 'propaganda': -2.3, 'polarization': -2.2, 'abuse': -2.5,

        # Neutral/Context-Dependent Words
        'leader': 1.0, 'policy': 1.2, 'campaign': 1.5, 'candidate': 0.5,
        'debate': 0.7, 'vote': 1.5, 'agenda': 1.0, 'law': 1.2,
        'economy': 0.9, 'strategy': 1.1, 'movement': 1.0, 'security': 1.0,
        'constitution': 0.8, 'foreign': -1.0, 'budget': 1.0, 'tax': -1.0,
        'legislation': 0.8, 'government': 1.0, 'executive': 0.7, 'judiciary': 0.7,
        'diplomat': 1.0, 'coalition': 1.1, 'treaty': 1.2, 'electoral': 1.0,
        'sanction': -1.5, 'lobbyist': -0.8, 'legislature': 1.0, 'bureaucracy': -1.2
    }
    sid.lexicon.update(politicalWords)  # Update the lexicon
    score = sid.polarity_scores(sentence)  # Get sentiment scores
    return score  # Return full sentiment scores

# Convert any non-string entries in the 'Transcript' column to empty strings
csvFile['Transcript'] = csvFile['Transcript'].astype(str)

# Create empty lists to store sentiment scores
neg = []
neu = []
pos = []
compound = []

# Loop through the texts and get the sentiment scores using the updated function
for text in csvFile["Transcript"]:
    scores = get_political_sentiment(text)
    neg.append(scores['neg'])
    neu.append(scores['neu'])
    pos.append(scores['pos'])
    compound.append(scores['compound'])

# Add sentiment scores as new columns to the DataFrame
csvFile['neg'] = neg
csvFile['neu'] = neu
csvFile['pos'] = pos
csvFile['compound'] = compound

# Group by 'YouTube URL' and calculate the average sentiment score per group
sentiment_avg = csvFile.groupby("YouTube URL")[["neg", "neu", "pos", "compound"]].mean().sort_values(by="compound", ascending=False)
print(sentiment_avg)

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


                                                         neg       neu  \
YouTube URL                                                              
https://www.youtube.com/live/XokApnr_Cak?si=7eV...  0.040576  0.786054   
https://youtu.be/Q5TIZnhXX7Q?si=agpeKU04EQ-AtdJ_    0.066934  0.756083   
https://youtu.be/-ofJu78Wpn0?si=H_BZ0KSYr_raPUac    0.069800  0.740400   
https://youtu.be/qc5NgBZXdtI?si=imCepjoyPvY2Jmcq    0.086038  0.759319   

                                                         pos  compound  
YouTube URL                                                             
https://www.youtube.com/live/XokApnr_Cak?si=7eV...  0.173315  0.543090  
https://youtu.be/Q5TIZnhXX7Q?si=agpeKU04EQ-AtdJ_    0.176945  0.452686  
https://youtu.be/-ofJu78Wpn0?si=H_BZ0KSYr_raPUac    0.189825  0.370946  
https://youtu.be/qc5NgBZXdtI?si=imCepjoyPvY2Jmcq    0.154643  0.261072  


In [3]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Load CSV file
csvFile = pd.read_csv('transcript_data_combined.csv')

# Download VADER lexicon
nltk.download('vader_lexicon')

# Set up the analyzer
analyzer = SentimentIntensityAnalyzer()

# Define expanded custom lexicon for political-related sentiments
def get_political_sentiment(sentence):
    sid = SentimentIntensityAnalyzer()
    # Expanded custom lexicon for political sentiments
    politicalWords = {
        # Positive Sentiment Words
        'progress': 2.0, 'peaceful': 2.5, 'visionary': 2.3, 'hopeful': 2.2,
        'inclusive': 2.0, 'unity': 1.8, 'reform': 1.7, 'growth': 1.9,
        'prosperity': 2.0, 'strong': 2.4, 'freedom': 1.8, 'justice': 1.9,
        'opportunity': 2.5, 'equality': 2.5, 'empower': 2.4, 'resolve': 2.3,
        'bright': 2.2, 'leadership': 2.1, 'diplomacy': 2.0, 'accountability': 2.1,
        'collaboration': 2.0, 'integrity': 2.3, 'innovation': 2.0, 'resilience': 1.9,
        'solidarity': 2.2, 'compassion': 2.4, 'inclusive': 2.3, 'patriotism': 2.1,
        'courage': 2.5, 'dedication': 2.4, 'responsibility': 2.3, 'sacrifice': 2.2,

        # Negative Sentiment Words
        'divisive': -2.5, 'corruption': -2.0, 'crisis': -2.2, 'chaos': -2.3,
        'conflict': -2.5, 'threat': -2.0, 'oppression': -1.8, 'failure': -2.5,
        'unrest': -2.1, 'inequality': -2.3, 'exploitation': -2.0, 'injustice': -2.4,
        'scandal': -2.5, 'lies': -2.4, 'betrayal': -2.3, 'turmoil': -2.1,
        'fear': -2.3, 'authoritarian': -2.4, 'censorship': -2.2, 'division': -2.0,
        'repression': -2.3, 'mismanagement': -2.1, 'controversy': -1.9, 'bias': -2.2,
        'hypocrisy': -2.4, 'slander': -2.1, 'greed': -2.5, 'violence': -2.4,
        'deception': -2.5, 'propaganda': -2.3, 'polarization': -2.2, 'abuse': -2.5,

        # Neutral/Context-Dependent Words
        'leader': 1.0, 'policy': 1.2, 'campaign': 1.5, 'candidate': 0.5,
        'debate': 0.7, 'vote': 1.5, 'agenda': 1.0, 'law': 1.2,
        'economy': 0.9, 'strategy': 1.1, 'movement': 1.0, 'security': 1.0,
        'constitution': 0.8, 'foreign': -1.0, 'budget': 1.0, 'tax': -1.0,
        'legislation': 0.8, 'government': 1.0, 'executive': 0.7, 'judiciary': 0.7,
        'diplomat': 1.0, 'coalition': 1.1, 'treaty': 1.2, 'electoral': 1.0,
        'sanction': -1.5, 'lobbyist': -0.8, 'legislature': 1.0, 'bureaucracy': -1.2
    }
    sid.lexicon.update(politicalWords)  # Update the lexicon
    score = sid.polarity_scores(sentence)  # Get sentiment scores
    return score  # Return full sentiment scores

# Convert any non-string entries in the 'Transcript' column to empty strings
csvFile['Transcript'] = csvFile['Transcript'].astype(str)

# Create empty lists to store sentiment scores
neg = []
neu = []
pos = []
compound = []

# Loop through the texts and get the sentiment scores using the updated function
for text in csvFile["Transcript"]:
    scores = get_political_sentiment(text)
    neg.append(scores['neg'])
    neu.append(scores['neu'])
    pos.append(scores['pos'])
    compound.append(scores['compound'])

# Add sentiment scores as new columns to the DataFrame
csvFile['neg'] = neg
csvFile['neu'] = neu
csvFile['pos'] = pos
csvFile['compound'] = compound

# Group by 'YouTube URL' and calculate the average sentiment score per group
sentiment_avg = csvFile.groupby("YouTube URL")[["neg", "neu", "pos", "compound"]].mean().sort_values(by="compound", ascending=False)

# Save the result as a CSV file
sentiment_avg.to_csv('grouped_sentiment_analysis.csv', index=True)
print("Sentiment analysis results saved to 'grouped_sentiment_analysis.csv'")


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Sentiment analysis results saved to 'grouped_sentiment_analysis.csv'


In [4]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Load CSV file
csvFile = pd.read_csv('transcript_data_combined.csv')

# Download VADER lexicon
nltk.download('vader_lexicon')

# Set up the analyzer
analyzer = SentimentIntensityAnalyzer()

# Define expanded custom lexicon for political-related sentiments
def get_political_sentiment(sentence):
    sid = SentimentIntensityAnalyzer()
    # Expanded custom lexicon for political sentiments
    politicalWords = {
        # Positive Sentiment Words
        'progress': 2.0, 'peaceful': 2.5, 'visionary': 2.3, 'hopeful': 2.2,
        'inclusive': 2.0, 'unity': 1.8, 'reform': 1.7, 'growth': 1.9,
        'prosperity': 2.0, 'strong': 2.4, 'freedom': 1.8, 'justice': 1.9,
        'opportunity': 2.5, 'equality': 2.5, 'empower': 2.4, 'resolve': 2.3,
        'bright': 2.2, 'leadership': 2.1, 'diplomacy': 2.0, 'accountability': 2.1,
        'collaboration': 2.0, 'integrity': 2.3, 'innovation': 2.0, 'resilience': 1.9,
        'solidarity': 2.2, 'compassion': 2.4, 'inclusive': 2.3, 'patriotism': 2.1,
        'courage': 2.5, 'dedication': 2.4, 'responsibility': 2.3, 'sacrifice': 2.2,

        # Negative Sentiment Words
        'divisive': -2.5, 'corruption': -2.0, 'crisis': -2.2, 'chaos': -2.3,
        'conflict': -2.5, 'threat': -2.0, 'oppression': -1.8, 'failure': -2.5,
        'unrest': -2.1, 'inequality': -2.3, 'exploitation': -2.0, 'injustice': -2.4,
        'scandal': -2.5, 'lies': -2.4, 'betrayal': -2.3, 'turmoil': -2.1,
        'fear': -2.3, 'authoritarian': -2.4, 'censorship': -2.2, 'division': -2.0,
        'repression': -2.3, 'mismanagement': -2.1, 'controversy': -1.9, 'bias': -2.2,
        'hypocrisy': -2.4, 'slander': -2.1, 'greed': -2.5, 'violence': -2.4,
        'deception': -2.5, 'propaganda': -2.3, 'polarization': -2.2, 'abuse': -2.5,

        # Neutral/Context-Dependent Words
        'leader': 1.0, 'policy': 1.2, 'campaign': 1.5, 'candidate': 0.5,
        'debate': 0.7, 'vote': 1.5, 'agenda': 1.0, 'law': 1.2,
        'economy': 0.9, 'strategy': 1.1, 'movement': 1.0, 'security': 1.0,
        'constitution': 0.8, 'foreign': -1.0, 'budget': 1.0, 'tax': -1.0,
        'legislation': 0.8, 'government': 1.0, 'executive': 0.7, 'judiciary': 0.7,
        'diplomat': 1.0, 'coalition': 1.1, 'treaty': 1.2, 'electoral': 1.0,
        'sanction': -1.5, 'lobbyist': -0.8, 'legislature': 1.0, 'bureaucracy': -1.2
    }
    sid.lexicon.update(politicalWords)  # Update the lexicon
    score = sid.polarity_scores(sentence)  # Get sentiment scores
    return score  # Return full sentiment scores

# Convert any non-string entries in the 'Transcript' column to empty strings
csvFile['Transcript'] = csvFile['Transcript'].astype(str)

# Create empty lists to store sentiment scores
neg = []
neu = []
pos = []
compound = []

# Loop through the texts and get the sentiment scores using the updated function
for text in csvFile["Transcript"]:
    scores = get_political_sentiment(text)
    neg.append(scores['neg'])
    neu.append(scores['neu'])
    pos.append(scores['pos'])
    compound.append(scores['compound'])

# Add sentiment scores as new columns to the DataFrame
csvFile['neg'] = neg
csvFile['neu'] = neu
csvFile['pos'] = pos
csvFile['compound'] = compound

# Save the result as a CSV file with sentiment scores for each row
csvFile.to_csv('individual_sentiment_analysis.csv', index=False)
print("Individual sentiment analysis results saved to 'individual_sentiment_analysis.csv'")


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Individual sentiment analysis results saved to 'individual_sentiment_analysis.csv'
