#**Purpose**

The purpose of this notebook is to provide the top ten tickers mentioned in the past 24 hours across five financial subreddits using the reddit api. There is a weighted calculation where the top ticker is then used to calculate both the news and social media sentiment over the last 365 days.


# **Initial Setup**

In [1]:
pip install asyncpraw

Collecting asyncpraw
  Downloading asyncpraw-7.8.1-py3-none-any.whl.metadata (9.0 kB)
Collecting aiofiles (from asyncpraw)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting aiosqlite<=0.17.0 (from asyncpraw)
  Downloading aiosqlite-0.17.0-py3-none-any.whl.metadata (4.1 kB)
Collecting asyncprawcore<3,>=2.4 (from asyncpraw)
  Downloading asyncprawcore-2.4.0-py3-none-any.whl.metadata (5.5 kB)
Collecting update_checker>=0.18 (from asyncpraw)
  Downloading update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Downloading asyncpraw-7.8.1-py3-none-any.whl (196 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m196.4/196.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading aiosqlite-0.17.0-py3-none-any.whl (15 kB)
Downloading asyncprawcore-2.4.0-py3-none-any.whl (19 kB)
Downloading update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Downloading aiofiles-24.1.0-py3-none-any.whl (15 kB)
Installing collected packages: aiosqlite, aiofiles, update

In [2]:
!pip install asyncpraw nest_asyncio



In [3]:
import asyncpraw
import nest_asyncio
import pandas as pd
from collections import Counter
from datetime import datetime, timedelta
from google.colab import drive

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
def get_nasdaq_symbols():
    file_path = '/content/drive/My Drive/StockDashboard_Automation/nasdaqlisted.txt'
    df = pd.read_csv(file_path, delimiter='\t')
    symbols_df = df[['Symbol', 'Security Name']]
    symbols_df.columns = ['Ticker', 'Company Name']
    return symbols_df

nasdaq_symbols = get_nasdaq_symbols()
ticker_symbols = nasdaq_symbols['Ticker'].tolist()
company_names = nasdaq_symbols['Company Name'].tolist()

print(f"✅ Loaded {len(nasdaq_symbols)} NASDAQ companies.")

✅ Loaded 4804 NASDAQ companies.


# **Connect to the Reddit API**

In [6]:
reddit = asyncpraw.Reddit(
    client_id="VEB2VxRgVPH5RD8t6CBxiw",
    client_secret="KxTkyViIMNdYW2gZpdPwhqysD4HMgw",
    username="novicestockbot",
    password="Capstone606!",
    user_agent="novicestockbot by u/novicestockbot"
)


# **Extract Tickers from Titles, Posts  & Comments**

In [7]:
nest_asyncio.apply()

subreddits = ["wallstreetbets", "stocks", "investing", "securityanalysis", "stockmarket"]
start_time = datetime.utcnow() - timedelta(days=1)

async def fetch_reddit_data():
    all_data = []

    for subreddit_name in subreddits:
        subreddit = await reddit.subreddit(subreddit_name)
        async for submission in subreddit.new(limit=500):
            post_time = datetime.utcfromtimestamp(submission.created_utc)
            if post_time >= start_time:
                all_data.append({
                    "created_utc": post_time.strftime("%Y-%m-%d"),
                    "subreddit": subreddit_name,
                    "text": submission.title[:500],
                    "source": "Title"
                })

                if submission.selftext:
                    all_data.append({
                        "created_utc": post_time.strftime("%Y-%m-%d"),
                        "subreddit": subreddit_name,
                        "text": submission.selftext[:500],
                        "source": "Post"
                    })

                await submission.load()
                await submission.comments.replace_more(limit=0)
                sorted_comments = sorted(submission.comments.list(), key=lambda c: c.score, reverse=True)[:5]
                for comment in sorted_comments:
                    comment_time = datetime.utcfromtimestamp(comment.created_utc)
                    if comment_time >= start_time:
                        all_data.append({
                            "created_utc": comment_time.strftime("%Y-%m-%d"),
                            "subreddit": subreddit_name,
                            "text": comment.body[:500],
                            "source": "Comment"
                        })

    return pd.DataFrame(all_data)


In [8]:
df = await fetch_reddit_data()
df.head()

Unnamed: 0,created_utc,subreddit,text,source
0,2025-05-05,wallstreetbets,Best play I made this year: shorting Netflix o...,Title
1,2025-05-05,wallstreetbets,Netflix was the single most overbought stock i...,Comment
2,2025-05-05,wallstreetbets,\n**User Report**| | | |\n:--|:--|:--|:--\n**T...,Comment
3,2025-05-05,wallstreetbets,What was your reasoning ?,Comment
4,2025-05-05,wallstreetbets,Am I missing the big trade or is this circus p...,Comment


In [9]:
print("Read-only mode:", reddit.read_only)


Read-only mode: False


In [10]:
if df.empty:
    print("⚠ No new Reddit posts/comments found in the last 24 hours.")
else:
    print(f"✅ Collected {len(df)} Reddit posts/comments.")

✅ Collected 557 Reddit posts/comments.


In [11]:
print(df.head())

  created_utc       subreddit  \
0  2025-05-05  wallstreetbets   
1  2025-05-05  wallstreetbets   
2  2025-05-05  wallstreetbets   
3  2025-05-05  wallstreetbets   
4  2025-05-05  wallstreetbets   

                                                text   source  
0  Best play I made this year: shorting Netflix o...    Title  
1  Netflix was the single most overbought stock i...  Comment  
2  \n**User Report**| | | |\n:--|:--|:--|:--\n**T...  Comment  
3                          What was your reasoning ?  Comment  
4  Am I missing the big trade or is this circus p...  Comment  


# **Aggregate All Mentions**

In [12]:
import re

valid_tickers = set(nasdaq_symbols['Ticker'])

def extract_tickers(text):
    potential_tickers = re.findall(r'\b[A-Z]{1,5}\b', str(text))
    return [ticker for ticker in potential_tickers if ticker in valid_tickers]

df["extracted_tickers"] = df["text"].apply(extract_tickers)

In [13]:
df["extracted_tickers"] = df["extracted_tickers"].apply(lambda x: x if isinstance(x, list) else [])

df_exploded = df.explode("extracted_tickers").rename(columns={"extracted_tickers": "ticker_source"}).dropna()

ticker_counts = df_exploded.groupby(["ticker_source", "source"]).size().unstack(fill_value=0)

ticker_counts["title_weighted"] = ticker_counts.get("Title", 0) * 3
ticker_counts["post_weighted"] = ticker_counts.get("Post", 0) * 2
ticker_counts["comment_weighted"] = ticker_counts.get("Comment", 0) * 1

ticker_counts["weighted_score"] = ticker_counts["title_weighted"] + ticker_counts["post_weighted"] + ticker_counts["comment_weighted"]

top_10_tickers = ticker_counts[["weighted_score"]].sort_values(by="weighted_score", ascending=False).head(10)

In [14]:
print(top_10_tickers)

source         weighted_score
ticker_source                
PLTR                       14
QQQ                        11
TTD                        11
AMD                         8
SSBK                        7
CME                         5
BRKR                        4
MSFT                        4
WBD                         4
OP                          4


In [15]:
top_10_tickers = top_10_tickers.reset_index()

In [16]:
display(top_10_tickers)

source,ticker_source,weighted_score
0,PLTR,14
1,QQQ,11
2,TTD,11
3,AMD,8
4,SSBK,7
5,CME,5
6,BRKR,4
7,MSFT,4
8,WBD,4
9,OP,4


In [17]:
export_path = f"/content/drive/My Drive/StockDashboard_Automation/ExportToGitHub/Top Ten Tickers.csv"

top_10_tickers.to_csv(export_path, index=False)

print(f"✅ Exported Top Ten Tickers to {export_path}")

✅ Exported Top Ten Tickers to /content/drive/My Drive/StockDashboard_Automation/ExportToGitHub/Top Ten Tickers.csv


In [18]:
print(top_10_tickers)

source ticker_source  weighted_score
0               PLTR              14
1                QQQ              11
2                TTD              11
3                AMD               8
4               SSBK               7
5                CME               5
6               BRKR               4
7               MSFT               4
8                WBD               4
9                 OP               4
