<a href="https://colab.research.google.com/github/evanjholt/sentiment-tracker/blob/main/Reddit_Scrapper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install praw



In [None]:
import praw
import re
from datetime import datetime
import pandas as pd # Useful for data analysis and quick table views

In [None]:
# Cell 3: Load Stock Tickers from CSV (using GitHub Raw Link)

# --- IMPORTANT: Replace this URL with the actual raw link you copied from GitHub ---
csv_url = 'https://raw.githubusercontent.com/abbadata/stock-tickers/refs/heads/main/data/nasdaq.csv'

# --- IMPORTANT: Verify the column name in your chosen CSV that contains the tickers ---
ticker_column_name = 'Symbol' # Common for NASDAQ/NYSE lists. Could be 'Ticker', 'Stock', etc.

try:
    print(f"Attempting to load tickers from: {csv_url}")
    all_us_tickers_df = pd.read_csv(csv_url)

    # Basic data cleaning and validation for tickers
    all_us_tickers_df = all_us_tickers_df.dropna(subset=[ticker_column_name])
    all_us_tickers_df = all_us_tickers_df[all_us_tickers_df[ticker_column_name].apply(lambda x: isinstance(x, str))]

    raw_stock_tickers = all_us_tickers_df[ticker_column_name].unique().tolist() # Store as raw_stock_tickers temporarily

    # Further clean and normalize tickers (alphanumeric, uppercase, typical length)
    stock_tickers_processed = [t for t in raw_stock_tickers if t.isalnum() and 1 <= len(t) <= 5] # Tickers are usually 1-5 chars
    stock_tickers_processed = [t.upper() for t in stock_tickers_processed]


    # --- NEW: More robust common word exclusion ---
    # 1. Start with a more comprehensive list of English stop words
    # You can typically get these from NLTK or use a hardcoded list
    # For Colab, let's just define a fairly large list of very common short words.
    common_english_words = [
        "A", "AN", "AND", "ARE", "AS", "AT", "BE", "BECAUSE", "BUT", "BY", "FOR", "FROM", "HAS",
        "HE", "HER", "HIS", "HOW", "I", "IN", "IS", "IT", "ITS", "OF", "ON", "OR", "THAT", "THE",
        "THIS", "TO", "WAS", "WHAT", "WHEN", "WHERE", "WHO", "WHY", "WILL", "WITH", "YOU",
        "YOUR", "ME", "MY", "WE", "OUR", "YOURS", "HIM", "HIS", "HER", "HERS", "THEY", "THEM",
        "THEIR", "MINE", "DO", "DOES", "DID", "NOT", "NO", "YES", "AM", "ISN", "AREN", "WASN",
        "WEREN", "HAVEN", "HASN", "HAD", "ABOUT", "ABOVE", "AFTER", "AGAIN", "ALL", "AMONG",
        "ANY", "BELOW", "BETWEEN", "BOTH", "EACH", "FEW", "MORE", "MOST", "OTHER", "SOME",
        "SUCH", "THAN", "TOO", "VERY", "S", "T", "CAN", "JUST", "DON", "SHOULD", "NOW",
        "ONLY", "EVEN", "MUCH", "MANY", "ONE", "TWO", "THREE", "FOUR", "FIVE", "SIX", "SEVEN",
        "EIGHT", "NINE", "TEN", # Numbers as words
        # Add problematic tickers you've already observed that are common words
        'GOOD', 'HIGH', 'LOW', 'BIG', 'OLD', 'NEW', 'RUN', 'BUY', 'SELL', 'HOLD', 'READ', 'OUT',
        'SEE', 'FUN', 'HUB', 'MAP', 'PINE', 'RIDE', 'SAP', 'MOON', 'QT', 'ZAP', 'UPL', 'WOLF',
        'JOY', 'KISS', 'LUCK', 'VIEW', 'YUM', 'DOG', 'EAT', 'FROG', 'CAT', 'PUMP', 'DUMP', 'SPY', 'QQQ', 'VIX',
        'AI', 'META', 'GOOG', 'COIN', 'SHOP', 'SNOW', 'ROKU', 'PLTR', 'MRNA' # Tickers that are also highly common names/words
    ]
    # Combine with any specific short, common words you've observed as false positives
    # that might not be in a standard stop word list
    additional_problem_words = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' # Single letters
    ]
    all_excluded_words_set = set(common_english_words + additional_problem_words)
    all_excluded_words_set = {word.upper() for word in all_excluded_words_set} # Ensure uppercase for consistent matching

    # --- Filter out the excluded tickers ---
    stock_tickers = [t for t in stock_tickers_processed if t not in all_excluded_words_set]

    print(f"Loaded {len(stock_tickers_processed)} raw tickers. Excluded {len(stock_tickers_processed) - len(stock_tickers)} common word/excluded tickers.")
    print(f"Final list has {len(stock_tickers)} unique tickers.")
    if len(stock_tickers) > 0:
        print(f"First 10 filtered tickers: {stock_tickers[:10]}")
    else:
        print("No tickers remain after filtering. Check your exclusion list or CSV.")

except Exception as e:
    print(f"ERROR: Could not load tickers from GitHub URL. Error: {e}")
    print(f"WARNING: Using a smaller, hardcoded list as a fallback for testing.")
    # Fallback list (ensure this fallback also accounts for exclusions)
    stock_tickers = [t for t in ['GME', 'AMC', 'TSLA', 'AAPL', 'MSFT', 'UNH', 'NVDA', 'AMD', 'BB', 'CLOV', 'SOFI', 'RIVN', 'AMZN', 'NFLX'] if t not in all_excluded_words_set] # Filter fallback too!

Attempting to load tickers from: https://raw.githubusercontent.com/abbadata/stock-tickers/refs/heads/main/data/nasdaq.csv
Loaded 4056 raw tickers. Excluded 16 common word/excluded tickers.
Final list has 4040 unique tickers.
First 10 filtered tickers: ['VCVC', 'VCVCU', 'VCVCW', 'TXG', 'YI', 'YQ', 'TURN', 'ATNF', 'ATNFW', 'FLWS']


In [None]:
from google.colab import userdata

reddit = praw.Reddit(
    client_id=userdata.get('REDDIT_CLIENT_ID'),
    client_secret=userdata.get('REDDIT_CLIENT_SECRET'),
    user_agent=userdata.get('REDDIT_USER_AGENT')
)

In [None]:
import time # Ensure 'time' module is imported if not already in Cell 2

def find_stock_mentions(text):
    """
    Identifies stock tickers within a given text based on the global stock_tickers list.
    """
    mentions = set()
    # The 'stock_tickers' list is populated from Cell 3 and should be globally accessible
    for ticker in stock_tickers:
        # Look for exact word matches or $ticker (case-insensitive)
        # Using a word boundary '\b' to match whole words and prevent partial matches (e.g., 'CAT' in 'CATCH')
        # Using '\$' to match the dollar sign prefix (e.g., '$AAPL')
        if re.search(r'\b' + re.escape(ticker) + r'\b', text, re.IGNORECASE) or \
           re.search(r'\$' + re.escape(ticker) + r'\b', text, re.IGNORECASE):
            mentions.add(ticker.upper())
    return list(mentions)

def scrape_reddit_mentions():
    """
    Scrapes a limited number of new submissions and their comments
    from specified subreddits for stock mentions.
    Returns a dictionary where keys are stock tickers and values are lists of mention details.
    """
    stock_data = {}
    # You can start with one or two subreddits for faster testing, then add more.
    subreddits_to_monitor = ['stocks', 'investing'] # Start with these, add 'wallstreetbets' later if desired

    print("Starting Reddit scraping job...")
    print(f"Monitoring {len(stock_tickers)} tickers across {len(subreddits_to_monitor)} subreddits.")

    # --- Configuration for Scraping Limit and Pauses ---
    # Adjust 'scrape_limit_per_subreddit' based on how much data you want and your observed speed.
    # A value of 20-50 per subreddit is a good balance for testing.
    scrape_limit_per_subreddit = 20 # Number of 'new' submissions to fetch per subreddit

    # Pause duration: higher values mean slower but safer scraping.
    pause_between_submissions = 1 # seconds to pause after a few submissions
    pause_between_subreddits = 5 # seconds to pause after processing an entire subreddit

    for subreddit_name in subreddits_to_monitor:
        try:
            subreddit = reddit.subreddit(subreddit_name)
            print(f"\n--- Scraping r/{subreddit_name} (fetching top {scrape_limit_per_subreddit} new posts)... ---")

            # Fetch new submissions
            for i, submission in enumerate(subreddit.new(limit=scrape_limit_per_subreddit)):
                # Introduce a small pause periodically to respect Reddit API limits
                if i > 0 and i % 5 == 0: # Pause every 5 submissions
                    print(f"  Pausing {pause_between_submissions}s after {i} submissions in r/{subreddit_name}...")
                    time.sleep(pause_between_submissions)

                submission_text = submission.title + " " + submission.selftext # Combine title and body
                mentions_in_submission = find_stock_mentions(submission_text)

                if mentions_in_submission:
                    # print(f"    Found mentions in submission '{submission.title[:50]}...'") # Uncomment for detailed debug
                    for mention in mentions_in_submission:
                        if mention not in stock_data:
                            stock_data[mention] = []
                        stock_data[mention].append({
                            'type': 'submission',
                            'subreddit': subreddit_name,
                            'id': submission.id,
                            'title': submission.title,
                            'url': submission.url,
                            'score': submission.score,
                            'created_utc': submission.created_utc, # Unix timestamp
                            'timestamp': datetime.fromtimestamp(submission.created_utc).isoformat() # ISO 8601 string
                        })

                # Process comments for each submission
                # replace_more(limit=0) fetches only directly available comments,
                # avoiding extra API calls for "More Comments" links which is good for testing.
                submission.comments.replace_more(limit=0)
                for comment in submission.comments.list():
                    mentions_in_comment = find_stock_mentions(comment.body)
                    if mentions_in_comment:
                        # print(f"      Found mentions in comment '{comment.body[:50]}...'") # Uncomment for detailed debug
                        for mention in mentions_in_comment:
                            if mention not in stock_data:
                                stock_data[mention] = []
                            stock_data[mention].append({
                                'type': 'comment',
                                'subreddit': subreddit_name,
                                'id': comment.id,
                                'submission_id': submission.id, # Link comment to its parent submission
                                'comment_body': comment.body,
                                'url': f"https://reddit.com{comment.permalink}",
                                'score': comment.score,
                                'created_utc': comment.created_utc,
                                'timestamp': datetime.fromtimestamp(comment.created_utc).isoformat()
                            })
        except Exception as e:
            print(f"ERROR: Failed to scrape r/{subreddit_name}. Error: {e}")

        # Pause after finishing an entire subreddit, gives a larger break before the next subreddit
        print(f"  Finished r/{subreddit_name}. Pausing for {pause_between_subreddits}s before next subreddit...")
        time.sleep(pause_between_subreddits)

    print("\nReddit scraping job finished.")
    return stock_data

# --- Execute the scraper function and store results ---
test_mentions = scrape_reddit_mentions()

print("\n--- Scrape Result Summary ---")
if test_mentions:
    total_mentions_found = sum(len(data) for data in test_mentions.values())
    print(f"Found {total_mentions_found} mentions for {len(test_mentions)} unique stocks.")
    # Show top 5 mentioned stocks by raw count
    sorted_stocks = sorted(test_mentions.items(), key=lambda item: len(item[1]), reverse=True)
    print("\nTop 5 Most Mentioned Stocks (Raw Count):")
    for stock, data in sorted_stocks[:5]:
        print(f"  - {stock}: {len(data)} mentions")
else:
    print("No stock mentions found in this scrape. Try increasing scrape_limit_per_subreddit or adding more active subreddits/tickers.")

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Starting Reddit scraping job...
Monitoring 4040 tickers across 2 subreddits.

--- Scraping r/stocks (fetching top 20 new posts)... ---


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

  Pausing 1s after 5 submissions in r/stocks...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

  Pausing 1s after 10 submissions in r/stocks...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

  Pausing 1s after 15 submissions in r/stocks...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

  Finished r/stocks. Pausing for 5s before next subreddit...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.




--- Scraping r/investing (fetching top 20 new posts)... ---


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

  Pausing 1s after 5 submissions in r/investing...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

  Pausing 1s after 10 submissions in r/investing...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

  Pausing 1s after 15 submissions in r/investing...


It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/l

  Finished r/investing. Pausing for 5s before next subreddit...

Reddit scraping job finished.

--- Scrape Result Summary ---
Found 534 mentions for 98 unique stocks.

Top 5 Most Mentioned Stocks (Raw Count):
  - GO: 51 mentions
  - GOLD: 23 mentions
  - NEXT: 23 mentions
  - REAL: 22 mentions
  - FUND: 22 mentions


In [None]:
# Cell 5: Analyze Scraped Data with Pandas
# This cell transforms the scraped data into a DataFrame for easier analysis and ranking.

# Flatten the dictionary of mentions into a list of dictionaries
all_mentions_list = []
for ticker, mentions in test_mentions.items():
    for mention_detail in mentions:
        mention_detail['ticker'] = ticker # Add the stock ticker to each mention record
        all_mentions_list.append(mention_detail)

if all_mentions_list:
    # Create a Pandas DataFrame from the list of mentions
    df = pd.DataFrame(all_mentions_list)

    print("\n--- DataFrame Head (First 5 rows) ---")
    print(df.head()) # Shows the first 5 rows of your data

    print("\n--- Basic DataFrame Info ---")
    df.info() # Provides a summary of the DataFrame structure and data types

    print("\n--- Stock Mention Counts (Overall Ranking) ---")
    # Count how many times each stock ticker was mentioned
    # This is your primary ranking based on raw mention count
    print(df['ticker'].value_counts().head(15)) # Show top 15 by count

    print("\n--- Mentions by Type (Submission vs. Comment) ---")
    print(df['type'].value_counts())

    print("\n--- Subreddit Mention Counts ---")
    print(df['subreddit'].value_counts())

    print("\n--- Top 10 Submissions by Score (Most Engaged Posts) ---")
    # Filter for submissions and sort by 'score' (upvotes)
    submissions_df = df[df['type'] == 'submission'].sort_values(by='score', ascending=False)
    if not submissions_df.empty:
        # Display relevant columns for top submissions
        print(submissions_df.head(10)[['ticker', 'title', 'score', 'url', 'timestamp']])
    else:
        print("No submission data found to show top posts.")

    print("\n--- Top 10 Comments by Score (Most Engaged Comments) ---")
    # Filter for comments and sort by 'score'
    comments_df = df[df['type'] == 'comment'].sort_values(by='score', ascending=False)
    if not comments_df.empty:
        # Display relevant columns for top comments
        print(comments_df.head(10)[['ticker', 'comment_body', 'score', 'url', 'timestamp']])
    else:
        print("No comment data found to show top comments.")

    # --- Advanced Analysis (Optional - uncomment and explore) ---
    # Convert timestamp column to datetime objects for time-based analysis
    # df['datetime'] = pd.to_datetime(df['timestamp'])

    # print("\n--- Mentions by Hour (Example) ---")
    # if 'datetime' in df.columns:
    #     df['hour'] = df['datetime'].dt.hour
    #     print(df.groupby('hour')['ticker'].value_counts().unstack(fill_value=0))

    # print("\n--- Mentions per Stock over Time (Example) ---")
    # if 'datetime' in df.columns:
    #     # Group by date and ticker, then count
    #     daily_mentions = df.groupby([df['datetime'].dt.date, 'ticker']).size().unstack(fill_value=0)
    #     print(daily_mentions.tail()) # Show last few days

else:
    print("\nNo mentions were found in the DataFrame to analyze.")



--- DataFrame Head (First 5 rows) ---
         type subreddit       id                                     title  \
0  submission    stocks  1ks8nyj  ETOR valuation against HOOD, IBKR & BULL   
1     comment    stocks  mti3maq                                       NaN   
2  submission    stocks  1ks8nyj  ETOR valuation against HOOD, IBKR & BULL   
3     comment    stocks  mtizi2z                                       NaN   
4     comment    stocks  mthg5aw                                       NaN   

                                                 url  score   created_utc  \
0  https://www.reddit.com/r/stocks/comments/1ks8n...      1  1.747860e+09   
1  https://reddit.com/r/stocks/comments/1ks1mx9/w...      2  1.747847e+09   
2  https://www.reddit.com/r/stocks/comments/1ks8n...      1  1.747860e+09   
3  https://reddit.com/r/stocks/comments/1ks6bjo/u...     11  1.747856e+09   
4  https://reddit.com/r/stocks/comments/1krzdkd/e...      3  1.747840e+09   

             timestamp ticker