# === reddit_headlines.py ===

Scrapes Reddit posts related to a given stock ticker from multiple subreddits,
and classifies sentiment using OpenAI.

In [2]:
import os
import re
import time
import openai
import pandas as pd
import praw
from dotenv import load_dotenv

load_dotenv()

True

In [24]:
import time
import datetime
import pandas as pd
import praw
import openai
import re
import os

# === Setup credentials ===
openai.api_key = os.getenv("OPENAI_API_KEY")

reddit = praw.Reddit(
    client_id="DigBa8E0LvB9sIKdM54j_A",
    client_secret="yZJtGdsTrS8xSR6QCRe0Xl9Dw7Mj9g",
    user_agent="financial-news-scraper"
)

# === Subreddits to scan ===
subreddits = ["stocks", "investing", "finance", "wallstreetbets", "options"]

# === Cleaning functions ===
def clean_text(text):
    if not isinstance(text, str):
        return text
    text = re.sub(r'@[A-Za-z0-9_]+', '', text)
    text = re.sub(r'#', '', text)
    text = re.sub(r'RT[\s]+', '', text)
    text = re.sub(r'https?:\/\/\S+', '', text)
    text = re.sub(r':', '', text)
    return text.strip()

def remove_emoji(text):
    if not isinstance(text, str):
        return text
    emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"
        u"\U0001F300-\U0001F5FF"
        u"\U0001F680-\U0001F6FF"
        u"\U0001F1E0-\U0001F1FF"
        u"\U00002500-\U00002BEF"
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        u"\U00010000-\U0010ffff"
        u"\u2640-\u2642"
        u"\u2600-\u2B55"
        u"\u200d"
        u"\u23cf"
        u"\u23e9"
        u"\u231a"
        u"\ufe0f"
        u"\u3030"
        "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r"", text)

# === Sentiment Classification ===
def get_openai_sentiment(text, ticker):
    prompt = (
        f"You are a financial sentiment analysis assistant. "
        f"Classify the sentiment of the following Reddit title about '{ticker}':\n"
        f"Title: \"{text}\"\n"
        f"Respond with one word only: Positive, Neutral, or Negative."
    )
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"OpenAI error: {e}")
        return "Neutral"

def summarize_sentiment_outlook(df, ticker):
    """Summarize the overall sentiment signal based on OpenAI-labeled results."""
    sentiment_counts = df["sentiment"].value_counts()
    total = sentiment_counts.sum()
    pos = sentiment_counts.get("Positive", 0)
    neg = sentiment_counts.get("Negative", 0)

    if pos > neg:
        outlook = "Positive"
    elif neg > pos:
        outlook = "Negative"
    else:
        outlook = "Neutral"

    return (
        f"Reddit sentiment for {ticker} shows a **{outlook}** outlook "
        f"(Positive: {pos}, Negative: {neg}, Total: {total})."
    )


def summarize_reddit_reasoning(df, ticker):
    """Uses OpenAI to summarize the common themes in Reddit posts."""
    titles_text = "\n".join(f"- {title}" for title in df["title"].tolist())

    prompt = (
        f"You are a financial analyst assistant. Summarize the main reasons behind the Reddit sentiment "
        f"for the stock {ticker} based on these Reddit post titles:\n\n"
        f"{titles_text}\n\n"
        f"Keep your response short and insightful."
    )

    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.5,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"OpenAI error: {e}")
        return "Unable to summarize Reddit discussions at this time."


# === Main Reddit Sentiment Function ===
def get_reddit_sentiment(ticker, days_back=186):
    """Scrape Reddit for ticker mentions and summarize sentiment."""
    print(f"\nFetching Reddit stock discussions for {ticker}...\n")
    current_time = time.time()
    time_threshold = current_time - (days_back * 86400)
    posts = []

    for subreddit_name in subreddits:
        print(f"Scraping r/{subreddit_name} for '{ticker}' mentions...")
        subreddit = reddit.subreddit(subreddit_name)
        results = subreddit.search(ticker, sort="new", time_filter="all")

        for post in results:
            if post.created_utc >= time_threshold and post.score >= 3:
                title = clean_text(remove_emoji(post.title))
                posts.append({"title": title})

    if not posts:
        return f"No notable Reddit discussions found for {ticker} in the last {days_back} days."

    df = pd.DataFrame(posts)
    print(f"\nClassifying {len(df)} posts using OpenAI...\n")
    df["sentiment"] = df["title"].apply(lambda x: get_openai_sentiment(x, ticker))

    sentiment_summary = summarize_sentiment_outlook(df, ticker)
    reasoning_summary = summarize_reddit_reasoning(df, ticker)

    return f"{sentiment_summary}\n\n**Summary of Reddit Discussions:**\n{reasoning_summary}"

In [25]:
print(get_reddit_sentiment("MSFT", days_back=90))


Fetching Reddit stock discussions for MSFT...

Scraping r/stocks for 'MSFT' mentions...
Scraping r/investing for 'MSFT' mentions...
Scraping r/finance for 'MSFT' mentions...
Scraping r/wallstreetbets for 'MSFT' mentions...
Scraping r/options for 'MSFT' mentions...

Classifying 94 posts using OpenAI...

Reddit sentiment for MSFT shows a **Negative** outlook (Positive: 4, Negative: 20, Total: 94).

**Summary of Reddit Discussions:**
The sentiment around Microsoft (MSFT) on Reddit appears mixed, reflecting both concern and cautious optimism among investors. Key themes include:

1. **Performance Concerns**: Posts express worry about MSFT's recent flat year-over-year performance, light quarterly Azure growth, and questions about its ability to weather trade wars and competitive pressures.

2. **Market Positioning**: Discussions about whether MSFT is "too big" to be affected by broader market dynamics and comparisons with other tech giants highlight its significant market presence but also 