In [13]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = 'alex-stocks-917bf86d24f0.json'

In [14]:
from google.cloud import firestore

def fetch_firestore_data(collection_name):
    """
    Fetch all documents from a Firestore collection and store them in a Pandas DataFrame.

    :param collection_name: Name of the Firestore collection.
    :return: Pandas DataFrame containing Firestore data.
    """
    # Initialize Firestore client
    db = firestore.Client()

    # Reference the Firestore collection
    collection_ref = db.collection(collection_name)

    # Fetch all documents
    docs = collection_ref.stream()

    # Convert documents into a list of dictionaries
    data = []
    for doc in docs:
        doc_dict = doc.to_dict()
        doc_dict['id'] = doc.id  # Include document ID
        data.append(doc_dict)

    # Create DataFrame
    return data

# Example usage


In [15]:
data = fetch_firestore_data('stock_data')

In [21]:
import pandas as pd
data_df = pd.DataFrame(data)
data_df.created_hour = data_df.created_at.dt.hour

  data_df.created_hour = data_df.created_at.dt.hour


In [23]:
data_df.created_hour

0       9
1      18
2      18
3      18
4      18
       ..
593    20
594    20
595    20
596    20
597    20
Name: created_at, Length: 598, dtype: int32

In [26]:
data_df[data_df['content'] =='[deleted]']

Unnamed: 0,created_at,content,message_type,subreddit,source,url,id,title,score,selftext,timestamp,author,num_comments,parent_id,depth
10,2025-03-20 18:04:48+00:00,[deleted],reddit_comment,wallstreetbets,reddit,https://reddit.com/r/wallstreetbets/comments/1...,miu2k9w,,9,,1742494000.0,[deleted],0,t3_1jflc6z,0.0
48,2025-03-20 18:11:37+00:00,[deleted],reddit_comment,wallstreetbets,reddit,https://reddit.com/r/wallstreetbets/comments/1...,miu3zju,,7,,1742494000.0,[deleted],0,t3_1jflc6z,0.0
220,2025-03-20 18:40:34+00:00,[deleted],reddit_comment,wallstreetbets,reddit,https://reddit.com/r/wallstreetbets/comments/1...,miua0fi,,4,,1742496000.0,[deleted],0,t3_1jflc6z,0.0
223,2025-03-20 18:41:03+00:00,[deleted],reddit_comment,wallstreetbets,reddit,https://reddit.com/r/wallstreetbets/comments/1...,miua3yy,,9,,1742496000.0,[deleted],0,t3_1jflc6z,0.0


In [1]:
%load_ext autoreload
%autoreload 2

import logging

logging.basicConfig(level=logging.INFO, force=True)  # force=True resets existing handlers

import os
os.getenv('REDDIT_CLIENT_ID')
from scrapers.reddit_scraper_v2 import RedditScraper
from dotenv import load_dotenv

load_dotenv()


True

In [2]:
try:
    async with RedditScraper() as rs:
        test_post, comments = await rs.fetch_post_with_comments('1jhdzhc')
except Exception as e:
    print(e)

INFO:RedditAPI:Reddit API client initialized successfully
INFO:RedditScraper:Fetching post 1jhdzhc with comments (limit: None)
INFO:RedditScraper:Replacing 'more comments' objects for post 1jhdzhc
INFO:RedditScraper:Processing comments for post 1jhdzhc
INFO:CommentProcessor:Total processed comments: 91
INFO:RedditScraper:Successfully processed 91 comments for post 1jhdzhc
INFO:RedditAPI:Closing Reddit API client
INFO:RedditAPI:Reddit API client closed successfully


In [3]:
comments[0].submission_id

'1jhdzhc'

In [7]:
import pandas as pd
import re, datetime
sources = {
    "NASDAQ": "ftp://ftp.nasdaqtrader.com/SymbolDirectory/nasdaqlisted.txt",
    "NYSE": "ftp://ftp.nasdaqtrader.com/SymbolDirectory/otherlisted.txt"
}

all_stocks = []

for exchange, url in sources.items():
    try:
        df = pd.read_csv(url, sep='|')
        if exchange == "NASDAQ":
            symbols = df[df['Test Issue'] == 'N']['Symbol']
            names = df[df['Test Issue'] == 'N']['Security Name']
        else:
            symbols = df[df['Test Issue'] == 'N']['ACT Symbol']
            names = df[df['Test Issue'] == 'N']['Security Name']

        for ticker, name in zip(symbols, names):
            if isinstance(ticker, str) and re.match(r'^[A-Z]{1,5}$', ticker):
                all_stocks.append({
                    'ticker': ticker.strip(),
                    'exchange': exchange,
                    'company_name': name.strip(),
                    'last_updated': datetime.datetime.now().isoformat()
                })

        print(f"Downloaded {len(symbols)} {exchange} stocks")
    except Exception as e:
        print(f"Error downloading {exchange} stocks: {str(e)}")


Downloaded 4821 NASDAQ stocks
Downloaded 6453 NYSE stocks


In [8]:
all_stocks

[{'ticker': 'AACBU',
  'exchange': 'NASDAQ',
  'company_name': 'Artius II Acquisition Inc. - Units',
  'last_updated': '2025-04-05T11:24:19.635585'},
 {'ticker': 'AACG',
  'exchange': 'NASDAQ',
  'company_name': 'ATA Creativity Global - American Depositary Shares, each representing two common shares',
  'last_updated': '2025-04-05T11:24:19.635630'},
 {'ticker': 'AADR',
  'exchange': 'NASDAQ',
  'company_name': 'AdvisorShares Dorsey Wright ADR ETF',
  'last_updated': '2025-04-05T11:24:19.635635'},
 {'ticker': 'AAL',
  'exchange': 'NASDAQ',
  'company_name': 'American Airlines Group, Inc. - Common Stock',
  'last_updated': '2025-04-05T11:24:19.635638'},
 {'ticker': 'AAME',
  'exchange': 'NASDAQ',
  'company_name': 'Atlantic American Corporation - Common Stock',
  'last_updated': '2025-04-05T11:24:19.635642'},
 {'ticker': 'AAOI',
  'exchange': 'NASDAQ',
  'company_name': 'Applied Optoelectronics, Inc. - Common Stock',
  'last_updated': '2025-04-05T11:24:19.635645'},
 {'ticker': 'AAON',
  

Collecting lxml
  Using cached lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl.metadata (3.7 kB)
Using cached lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl (4.4 MB)
Installing collected packages: lxml
Successfully installed lxml-5.3.1
