In [None]:

import sys
import os
project_root = os.path.dirname(os.getcwd())
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# import the crawler
from crawler import RedditCrawler, CrawlerConfig


In [None]:
# Create configuration
config = CrawlerConfig(
    subreddit="IndianStockMarket",
    max_posts=10,
    max_comments_per_post=100,
    sort_by="new",  # Options: "new", "hot", "top", "rising"
    request_delay=2.0,
    mongo_uri="mongodb://localhost:27017",  # Your MongoDB URI
    database_name="reddit_crawler"
)

ðŸ“Œ Target: r/IndianStockMarket
ðŸ“Š Max posts: 10
ðŸ’¬ Max comments per post: 100


In [None]:
crawler = RedditCrawler(config)

# Connect to MongoDB (make sure MongoDB is running!)
if crawler.connect_database():
    print("Connected to MongoDB")


2025-12-29 12:48:47,641 - crawler.database - INFO - Database indexes created
2025-12-29 12:48:47,641 - crawler.database - INFO - Connected to MongoDB: reddit_crawler


Connected to MongoDB


In [None]:
# Run the crawl
result = crawler.crawl(fetch_comments=True)

print(f"Duration: {result.duration_seconds:.2f}s")
print(f"Posts fetched: {result.posts_fetched}")
print(f"--New: {result.posts_inserted}")
print(f"--Updated: {result.posts_updated}")
print(f"Comments fetched: {result.comments_fetched}")
print(f"--New: {result.comments_inserted}")
print(f"--Updated: {result.comments_updated}")


Duration: 22.92s
Posts fetched: 10
--New: 10
--Updated: 0
Comments fetched: 41
--New: 41
--Updated: 0


In [7]:
# View database statistics
stats = crawler.database.get_stats()
print("ðŸ“ˆ DATABASE STATS")
print(f"   Total posts: {stats['total_posts']}")
print(f"   Total comments: {stats['total_comments']}")
print(f"   Changes tracked: {stats['total_changes']}")


ðŸ“ˆ DATABASE STATS
   Total posts: 10
   Total comments: 41
   Changes tracked: 0


In [12]:
# View some sample posts
posts = crawler.database.get_posts(limit=5)

print("POSTS:\n")
for i, post in enumerate(posts, 1):
    print(f"{i}. [{post.get('score', 0):+d}] {post.get('title', '')}...")
    print(f"   Author: u/{post.get('author', 'unknown')}")
    print(f"   Comments: {post.get('num_comments', 0)}")
    print()


POSTS:

1. [+1] Should you Sell stocks before New Year?...
   Author: u/PretentiousFlower
   Comments: 3

2. [+1] All the Silver/Gold investors - BE VERY CAREFUL TODAY 29 DEC 2025. CME, the top exchange for forex, is going to increase Silver /Gold Futures Maintenance Margin from today after market close....
   Author: u/Cress-Used
   Comments: 1

3. [+1] Why you should not buy silver ETFs...
   Author: u/raghurame1991
   Comments: 5

4. [+2] Trading in delta exchange...
   Author: u/Additional-Ask-2775
   Comments: 7

5. [+22] Sold my Silver ETF with 100% gains...
   Author: u/babaispartan
   Comments: 17



In [13]:
# View comments for a specific post
if posts:
    first_post = posts[0]
    post_id = first_post.get('post_id')
    comments = crawler.database.get_comments_for_post(post_id, limit=5)
    
    print(f"ðŸ’¬ COMMENTS FOR: {first_post.get('title', '')[:50]}...\n")
    for comment in comments:
        body = comment.get('body', '')[:100].replace('\n', ' ')
        print(f"  [{comment.get('score', 0):+d}] {body}...")
        print(f"      - u/{comment.get('author', 'unknown')} (depth: {comment.get('depth', 0)})")
        print()


ðŸ’¬ COMMENTS FOR: Should you Sell stocks before New Year?...

  [+1]  General Guidelines - Buy/Sell, one-liner and Portfolio review posts will be removed.  Please refer ...
      - u/AutoModerator (depth: 0)

  [+1] Depends upon your sectors.  If sectors have no growth, sell.. ask AI to understand this aspect...
      - u/Only_bliss_ (depth: 0)

  [+1] What randomness is this! What has year end to do with profit booking??...
      - u/ApprehensiveBat8558 (depth: 0)



In [None]:
# Cleanup - disconnect from database
crawler.disconnect_database()
print("âœ… Disconnected from MongoDB")
