# Monte Carlo Simulation for Popular r/wsb Stocks
## First part: Importing libraries

In [1]:
import praw
from collections import Counter
import re
import pandas as pd
import csv
import os
from datetime import datetime
from dotenv import load_dotenv


# Get Reddit API credentials

In [None]:
# Reddit app credentials
load_dotenv()  
client_id = os.getenv("id")
client_secret = os.getenv("secret")
user_agent = os.getenv("user_agent")
reddit = praw.Reddit(
    client_id = client_id,
    client_secret = client_secret,
    user_agent = user_agent,
    username="Hashi118",
)


# Get top year 1000 posts from r/WallStreetBets

In [None]:
# Fetch top 100 posts from r/wallstreetbets
subreddit = reddit.subreddit('wallstreetbets')
top_posts = subreddit.top(time_filter="year", limit=1000)

# Simple regex to identify stock tickers
ticker_pattern = re.compile(r'\b[A-Z]{2,5}\b')
blacklist = {"YOLO", "THE", "AND", "ALL", "BUY", "SELL", "HOLD", "FOR", "IT", "US", "TLDR", "DD", "USD", "EU", "AI", "CEO", "WSB", "UAE"}

tickers = []
# Collect tickers
for post in top_posts:
  combined_text = post.title
  matches = ticker_pattern.findall(combined_text)
  filtered = [m for m in matches if m not in blacklist]
  tickers.extend(filtered)

      # Count most discussed tickers
ticker_counts = Counter(tickers)

print("Top discussed tickers (year):")
for ticker, count in ticker_counts.most_common(15):
    print(f"{ticker}: {count}")


In [None]:
# Fetch top 100 posts from r/wallstreetbets
subreddit = reddit.subreddit('wallstreetbets')
top_posts = subreddit.top(time_filter="year", limit=1000)
for post in top_posts:
    print(f"{post.title}"  + f" {post.link_flair_text}")


# Getting Posts from WallSteetBets


In [None]:
# Fetch NVDA posts from r/wallstreetbets
subreddit = reddit.subreddit('wallstreetbets')
stock_posts = subreddit.search("NVDA OR Nvidia", sort="top", time_filter="month", limit=100)

posts =[]
# Collect tickers
for post in stock_posts:
  title = post.title
  body = post.selftext
  url = post.url

 # Convert UTC timestamp to readable date
  post_date = datetime.fromtimestamp(post.created_utc).strftime('%Y-%m-%d %H:%M:%S')

     # Check for NVDA/Nvidia in title or selftext (case-insensitive)
  if "NVDA" in title.upper() or "NVIDIA" in title.upper() or "NVDA" in body.upper() or "NVIDIA" in body.upper():
      posts.append([title, body, post.score, post_date, url])


folder_path = "/Users/johnabuel/Desktop/stock data"
file_path = os.path.join(folder_path, "nvda_top_posts.csv")

# Write to CSV
with open(file_path, "w", newline='', encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Title", "Body", "Score", "Date", "URL"])
    writer.writerows(posts) 

  