In [None]:

import sys
import os
project_root = os.path.dirname(os.getcwd())
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# import the crawler
from crawler import RedditCrawler, CrawlerConfig


In [None]:
# Create configuration
config = CrawlerConfig(
    subreddit="IndianStockMarket",
    max_posts=10,
    max_comments_per_post=100,
    sort_by="new",  # Options: "new", "hot", "top", "rising"
    request_delay=2.0,
    # PostgreSQL settings
    postgres_host="localhost",
    postgres_port=5432,
    postgres_user="xxx",
    postgres_password="xxx$69",  # Change this!
    postgres_database="reddit_crawler"
)

In [None]:
crawler = RedditCrawler(config)

# Connect to PostgreSQL (make sure PostgreSQL is running!)
if crawler.connect_database():
    print("‚úÖ Connected to PostgreSQL!")
else:
    print("‚ùå Failed to connect. Make sure PostgreSQL is running and the database exists.")
    print("   Create database with: createdb reddit_crawler")


In [None]:
# Run the crawl
result = crawler.crawl(fetch_comments=True)

print(f"Duration: {result.duration_seconds:.2f}s")
print(f"Posts fetched: {result.posts_fetched}")
print(f"--New: {result.posts_inserted}")
print(f"--Updated: {result.posts_updated}")
print(f"Comments fetched: {result.comments_fetched}")
print(f"--New: {result.comments_inserted}")
print(f"--Updated: {result.comments_updated}")


In [None]:
# View database statistics
stats = crawler.database.get_stats()
print("üìà DATABASE STATS")
print(f"   Total posts: {stats['total_posts']}")
print(f"   Total comments: {stats['total_comments']}")
print(f"   Changes tracked: {stats['total_changes']}")


In [None]:
# View some sample posts
posts = crawler.database.get_posts(limit=5)

print("POSTS:\n")
for i, post in enumerate(posts, 1):
    print(f"{i}. [{post.get('score', 0):+d}] {post.get('title', '')}...")
    print(f"   Author: u/{post.get('author', 'unknown')}")
    print(f"   Comments: {post.get('num_comments', 0)}")
    print()


In [None]:
# View comments for a specific post
if posts:
    first_post = posts[0]
    post_id = first_post.get('post_id')
    comments = crawler.database.get_comments_for_post(post_id, limit=5)
    
    print(f"üí¨ COMMENTS FOR: {first_post.get('title', '')[:50]}...\n")
    for comment in comments:
        body = comment.get('body', '')[:100].replace('\n', ' ')
        print(f"  [{comment.get('score', 0):+d}] {body}...")
        print(f"      - u/{comment.get('author', 'unknown')} (depth: {comment.get('depth', 0)})")
        print()


In [None]:
# Cleanup - disconnect from database
crawler.disconnect_database()
print("‚úÖ Disconnected from MongoDB")
