# Sentiment Analysis: Trump Truth Social Posts

This notebook analyzes the sentiment of Trump's Truth Social posts using multiple models ().
VADER, FinBERT, RoBERTa

In [1]:
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

ModuleNotFoundError: No module named 'vaderSentiment'

In [None]:
# Load Cleaned Data
df_truth = pd.read_csv('data/truth_social_cleaned.csv')

df_truth['date'] = pd.to_datetime(df_truth['date'])

print(f"Total posts: {len(df_truth)}")
df_truth.head()

In [None]:
analyzer = SentimentIntensityAnalyzer()


def get_vader_sentiment(text):
    if pd.isna(text) or text == "":
        return 0.0
    scores = analyzer.polarity_scores(text)
    

    return scores['compound']


print("Calculating sentiment scores for VADER")
df_truth['vader_sentiment'] = df_truth['cleaned_content'].apply(get_vader_sentiment)

print(f"Sentiment score range: {df_truth['vader_sentiment'].min()} to {df_truth['vader_sentiment'].max()}")
print(f"Average sentiment: {df_truth['vader_sentiment'].mean()}")
df_truth[['cleaned_content', 'vader_sentiment']].head(10)

In [None]:

daily_sentiment = df_truth.groupby('date')['vader_sentiment'].mean().reset_index(name='avg_sentiment')
daily_sentiment['date'] = pd.to_datetime(daily_sentiment['date'])

print(f"Daily sentiment calculated for {len(daily_sentiment)} days")
print(f"\nAverage daily sentiment range: {daily_sentiment['avg_sentiment'].min():.3f} to {daily_sentiment['avg_sentiment'].max()}")
daily_sentiment.head(10)

In [None]:

combined_df = pd.read_csv('data/combined_data.csv')
combined_df['date'] = pd.to_datetime(combined_df['date'])

combined_df = pd.merge(combined_df, daily_sentiment, on='date', how='left')

combined_df['avg_sentiment'] = combined_df['avg_sentiment'].fillna(0)

print(f"Combined dataset: {len(combined_df)} days")
print(f"Days with sentiment data: {(combined_df['avg_sentiment'] != 0).sum()}")
combined_df[['date', 'post_count', 'avg_sentiment', 'Returns']].head(10)

In [None]:
combined_df[['date', 'post_count', 'avg_sentiment', 'Returns']].tail(10)

In [None]:

sentiment_corr = combined_df['avg_sentiment'].corr(combined_df['Returns'])

print("Correlation between sentiment and QQQ returns:", round(sentiment_corr, 4))

In [None]:

plt.figure(figsize=(14, 5))
plt.plot(combined_df['date'], combined_df['avg_sentiment'], alpha=0.7)
plt.axhline(y=0, color='r', linestyle='--', alpha=0.5, label='Neutral')
plt.xlabel('Date')
plt.ylabel('Average Sentiment Score')
plt.title('Daily Average Sentiment of Trump Truth Social Posts')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:

model_name = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.eval()

In [None]:

def get_finbert_sentiment(text):

    if pd.isna(text) or text == "":
        return 0.0
    
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    

    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    

    positive_prob = predictions[0][0].item()
    negative_prob = predictions[0][1].item()
    

    score = positive_prob - negative_prob
    return score


finbert_scores = []
total_posts = len(df_truth)

for i in range(total_posts):
    text = df_truth['cleaned_content'].iloc[i]
    score = get_finbert_sentiment(text)
    finbert_scores.append(score)
    

    if (i + 1) % 1000 == 0:
        print(f"Processed {i + 1} posts...")

df_truth['finbert_sentiment'] = finbert_scores

print(f"Sentiment score range: {df_truth['finbert_sentiment'].min()} to {df_truth['finbert_sentiment'].max()}")
print(f"Average sentiment: {df_truth['finbert_sentiment'].mean()}")
df_truth[['cleaned_content', 'vader_sentiment', 'finbert_sentiment']].head(10)

In [None]:

daily_finbert = df_truth.groupby('date')['finbert_sentiment'].mean().reset_index(name='avg_finbert_sentiment')
daily_finbert['date'] = pd.to_datetime(daily_finbert['date'])


combined_df = pd.merge(combined_df, daily_finbert, on='date', how='left')
combined_df['avg_finbert_sentiment'] = combined_df['avg_finbert_sentiment'].fillna(0)

print(f"Daily FinBERT sentiment calculated for {len(daily_finbert)} days")
print(f"Average daily FinBERT sentiment range: {daily_finbert['avg_finbert_sentiment'].min()} to {daily_finbert['avg_finbert_sentiment'].max()}")
combined_df[['date', 'post_count', 'avg_sentiment', 'avg_finbert_sentiment', 'Returns']].head(10)

In [None]:

vader_corr = combined_df['avg_sentiment'].corr(combined_df['Returns'])
finbert_corr = combined_df['avg_finbert_sentiment'].corr(combined_df['Returns'])

print("Correlation with QQQ Returns:")
print(f"VADER: {round(vader_corr, 4)}")
print(f"FinBERT: {round(finbert_corr, 4)}")
print(f"Difference: {round(abs(finbert_corr - vader_corr), 4)}")


model_corr = combined_df['avg_sentiment'].corr(combined_df['avg_finbert_sentiment'])
print(f"\nCorrelation between VADER and FinBERT: {round(model_corr, 4)}")

In [None]:

df_truth.to_csv('data/truth_social_with_sentiment.csv', index=False)
combined_df.to_csv('data/combined_data_with_sentiment.csv', index=False)