# Monte Carlo Simulation for Popular r/wsb Stocks
## Importing libraries

In [None]:
import praw
from collections import Counter
import re
import pandas as pd
import time
import csv
import os
from datetime import datetime, timedelta
from dotenv import load_dotenv
import requests
from praw.models import MoreComments


# Get Reddit API credentials

In [34]:
# Reddit app credentials
load_dotenv()  
client_id = os.getenv("id")
client_secret = os.getenv("secret")
user_agent = os.getenv("user_agent")
reddit = praw.Reddit(
    client_id = client_id,
    client_secret = client_secret,
    user_agent = user_agent,
    username="Hashi118",
)


# Get top year 1000 posts from r/WallStreetBets

In [None]:
# Fetch top 100 posts from r/wallstreetbets
subreddit = reddit.subreddit('wallstreetbets')
top_posts = subreddit.top(time_filter="year", limit=1000)

# Simple regex to identify stock tickers
ticker_pattern = re.compile(r'\b[A-Z]{2,5}\b')
blacklist = {"YOLO", "THE", "AND", "ALL", "BUY", "SELL", "HOLD", "FOR", "IT", "US", "TLDR", "DD", "USD", "EU", "AI", "CEO", "WSB", "UAE"}

tickers = []
# Collect tickers
for post in top_posts:
  combined_text = post.title
  matches = ticker_pattern.findall(combined_text)
  filtered = [m for m in matches if m not in blacklist]
  tickers.extend(filtered)

      # Count most discussed tickers
ticker_counts = Counter(tickers)

print("Top discussed tickers (year):")
for ticker, count in ticker_counts.most_common(15):
    print(f"{ticker}: {count}")


In [None]:
# Fetch top 100 posts from r/wallstreetbets
subreddit = reddit.subreddit('wallstreetbets')
top_posts = subreddit.top(time_filter="year", limit=1000)
for post in top_posts:
    print(f"{post.title}"  + f" {post.link_flair_text}")


In [None]:
# --- Define if post body contains meaningful text (not just image) ---
def is_text_body(body):
    return bool(body and len(body.strip()) > 20)

# Getting top posts for Nvidia (November 1, 2024 - April 30, 2025)


In [32]:
# --- Config ---
start_date = datetime(2024, 11, 1)
end_date = datetime(2025, 4, 30)
accepted_flairs = {"DD", "Discussion", "Catalyst", "News", "YOLO", "Gain", "Loss", "Technical Analysis"}
search_query = "NVDA OR Nvidia"
limit = 5000

# --- Fetch posts ---
subreddit = reddit.subreddit('wallstreetbets')
posts = []

print(f"Fetching posts from r/wallstreetbets matching: {search_query}")
results = subreddit.search(search_query, sort="top", time_filter="year", limit=limit)

for post in results:
    flair = post.link_flair_text or ""

    # Filter by flair
    if flair not in accepted_flairs:
        continue

    # Convert timestamp to datetime
    post_datetime = datetime.fromtimestamp(post.created_utc)
    if not (start_date <= post_datetime <= end_date):
        continue

    title = post.title
    body = post.selftext
    url = post.url

    # Must mention MSTR or MicroStrategy and have meaningful text
    if ("NVDA " in title.upper() or "NVIDIA" in title.upper() or 
        "NVDA" in body.upper() or "NVIDIA" in body.upper()) and is_text_body(body):
        
        # Add main post
        posts.append([
            title,
            body,
            post.score,
            post_datetime.strftime('%Y-%m-%d %H:%M:%S'),
            url,
            flair,
            "Post"  # Label to distinguish
        ])

        # --- Fetch top-level comments ---
        post.comments.replace_more(limit=50)
        for comment in post.comments:
            if isinstance(comment, MoreComments):
                continue
            if comment.body.strip() and len(comment.body) >= 10:  # Meaningful text
                comment_datetime = datetime.fromtimestamp(comment.created_utc)
                if start_date <= comment_datetime <= end_date:
                    posts.append([
                        f"[Comment on] {title}",
                        comment.body,
                        comment.score,
                        comment_datetime.strftime('%Y-%m-%d %H:%M:%S'),
                        url,
                        flair,
                        "Comment"  # Distinguish from post
                    ])

print(f"Collected {len(posts)} total rows (posts + comments)")

# --- Save to CSV ---
folder_path = "/Users/johnabuel/Desktop/stock data"
os.makedirs(folder_path, exist_ok=True)
file_path = os.path.join(folder_path, "nvda_top_posts.csv")

with open(file_path, "w", newline='', encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Title", "Body", "Score", "Date", "URL", "Flair", "Type"])
    writer.writerows(posts)

print(f"Saved to {file_path}")

Fetching posts from r/wallstreetbets matching: NVDA OR Nvidia
Collected 5692 total rows (posts + comments)
Saved to /Users/johnabuel/Desktop/stock data/nvda_top_posts.csv


# Getting top posts for MicroStrategy (November 1, 2024 - April 30, 2024)

In [33]:
# --- Config ---
start_date = datetime(2024, 11, 1)
end_date = datetime(2025, 4, 30)
accepted_flairs = {"DD", "Discussion", "Catalyst", "News", "YOLO", "Gain", "Loss", "Technical Analysis"}
search_query = "MSTR OR Microstrategy"
limit = 5000

# --- Fetch posts ---
subreddit = reddit.subreddit('wallstreetbets')
posts = []

print(f"Fetching posts from r/wallstreetbets matching: {search_query}")
results = subreddit.search(search_query, sort="top", time_filter="year", limit=limit)

for post in results:
    flair = post.link_flair_text or ""

    # Filter by flair
    if flair not in accepted_flairs:
        continue

    # Convert timestamp to datetime
    post_datetime = datetime.fromtimestamp(post.created_utc)
    if not (start_date <= post_datetime <= end_date):
        continue

    title = post.title
    body = post.selftext
    url = post.url

    # Must mention MSTR or MicroStrategy and have meaningful text
    if ("MSTR" in title.upper() or "MICROSTRATEGY" in title.upper() or 
        "MSTR" in body.upper() or "MICROSTRATEGY" in body.upper()) and is_text_body(body):
        
        # Add main post
        posts.append([
            title,
            body,
            post.score,
            post_datetime.strftime('%Y-%m-%d %H:%M:%S'),
            url,
            flair,
            "Post"  # Label to distinguish
        ])

        # --- Fetch top-level comments ---
        post.comments.replace_more(limit=0)
        for comment in post.comments:
            if isinstance(comment, MoreComments):
                continue
            if comment.body.strip() and len(comment.body) >= 10:  # Meaningful text
                comment_datetime = datetime.fromtimestamp(comment.created_utc)
                if start_date <= comment_datetime <= end_date:
                    posts.append([
                        f"[Comment on] {title}",
                        comment.body,
                        comment.score,
                        comment_datetime.strftime('%Y-%m-%d %H:%M:%S'),
                        url,
                        flair,
                        "Comment"  # Distinguish from post
                    ])

print(f"Collected {len(posts)} total rows (posts + comments)")

# --- Save to CSV ---
folder_path = "/Users/johnabuel/Desktop/stock data"
os.makedirs(folder_path, exist_ok=True)
file_path = os.path.join(folder_path, "mstr_top_posts.csv")

with open(file_path, "w", newline='', encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Title", "Body", "Score", "Date", "URL", "Flair", "Type"])
    writer.writerows(posts)

print(f"Saved to {file_path}")

Fetching posts from r/wallstreetbets matching: MSTR OR Microstrategy
Collected 8355 total rows (posts + comments)
Saved to /Users/johnabuel/Desktop/stock data/mstr_top_posts.csv


# TEST

In [38]:

# --- Config ---
start_date = datetime(2024, 11, 1)
end_date = datetime(2025, 4, 30)
accepted_flairs = {"DD", "Discussion", "Catalyst", "News", "YOLO", "Gain", "Loss", "Technical Analysis"}
search_query = "MSTR OR Microstrategy"
limit = 3000

# --- Fetch posts ---
subreddit = reddit.subreddit("wallstreetbets")
seen = set()  # To track duplicates
posts = []

print(f"Fetching 'top' and 'relevance' posts for: {search_query}")

for sort_type in ["top", "relevance"]:
    print(f"Fetching {sort_type} posts...")
    results = subreddit.search(search_query, sort=sort_type, time_filter="year", limit=limit)

    for post in results:
        flair = post.link_flair_text or ""
        if flair not in accepted_flairs:
            continue

        post_datetime = datetime.fromtimestamp(post.created_utc)
        if not (start_date <= post_datetime <= end_date):
            continue

        title = post.title.strip()
        body = post.selftext.strip()
        url = post.url
        identifier = (title.lower(), body.lower(), post_datetime.strftime('%Y-%m-%d'))  # Unique ID

        if identifier in seen:
            continue  # Skip duplicates
        seen.add(identifier)

        if ("MSTR" in title.upper() or "MICROSTRATEGY" in title.upper() or 
            "MSTR" in body.upper() or "MICROSTRATEGY" in body.upper()) and body and len(body) >= 10:
            
            posts.append([
                title,
                body,
                post.score,
                post_datetime.strftime('%Y-%m-%d %H:%M:%S'),
                url,
                flair,
                "Post"
            ])

print(f"Collected {len(posts)} unique posts")

# --- Save to CSV ---
folder_path = "/Users/johnabuel/Desktop/stock data"
os.makedirs(folder_path, exist_ok=True)
file_path = os.path.join(folder_path, "mstr_top_relevant_posts.csv")

with open(file_path, "w", newline='', encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Title", "Body", "Score", "Date", "URL", "Flair", "Type"])
    writer.writerows(posts)

print(f"Saved to {file_path}")


Fetching 'top' and 'relevance' posts for: MSTR OR Microstrategy
Fetching top posts...
Fetching relevance posts...
Collected 152 unique posts
Saved to /Users/johnabuel/Desktop/stock data/mstr_top_relevant_posts.csv
