# Reddit Product Review Scraper
This notebook uses the Reddit API to search for product reviews across various subreddits.

In [1]:
import json
import praw
import pandas as pd
import os
from pathlib import Path

ModuleNotFoundError: No module named 'praw'

## Setup Reddit API

In [None]:
# Reddit API credentials
reddit = praw.Reddit(
    client_id='RhCT2FHyjBBd1AjbnqylMQ',
    client_secret='A_wD-PpvLqLY9w-8VgB54hzkbHSuYA',
    user_agent='Izowk'
)

## Load Product Data

In [None]:
DATA_DIR = Path("session") / PRODUCT
META_PATH = DATA_DIR / "stage_1.json"
assert META_PATH.exists(), "Run 1_describe_product.ipynb first!"

with open(META_PATH, "r") as f:
    metadata = json.load(f)

competitors = [o["name"] for o in metadata["competition_products"]]
display(competitors)


## Define Subreddits to Search

In [None]:
# General-purpose subreddits for coverage
subreddits_to_search = [
    'all', 'popular', 'AskReddit', 'NoStupidQuestions', 'OutOfTheLoop',
    'TooAfraidToAsk', 'CasualConversation', 'TrueAskReddit', 'Advice',
    'LifeProTips', 'IWantToLearn', 'AskMen', 'AskWomen',
    'ProductAdvice', 'ShouldIbuythis', 'BuyItForLife'
]

## Search Reddit and Save Data

In [None]:
# Run for each product
for search_topic in competitors:
    search_query = f"{search_topic} review"
    post_limit = 10
    filename = f"{search_topic.replace(' ', '_')}_reddit_review"

    data = []
    valid_posts = 0

    print(f"\nSearching Reddit for: '{search_query}' across general-purpose subreddits...\n")

    for subreddit_name in subreddits_to_search:
        print(f"--- Searching r/{subreddit_name} ---")
        subreddit = reddit.subreddit(subreddit_name)

        try:
            for submission in subreddit.search(search_query, limit=50):
                if submission.stickied:
                    continue

                print(f"{submission.title}")
                submission.comments.replace_more(limit=0)

                # Add the post itself as a "comment"
                data.append({
                    'subreddit': subreddit_name,
                    'post_title': submission.title,
                    'post_url': submission.url,
                    'comment_body': submission.selftext if submission.selftext else submission.title,
                    'comment_author': str(submission.author),
                    'comment_score': submission.score,
                    'comment_created_utc': submission.created_utc
                })

                # Add comments
                for comment in submission.comments.list():
                    data.append({
                        'subreddit': subreddit_name,
                        'post_title': submission.title,
                        'post_url': submission.url,
                        'comment_body': comment.body,
                        'comment_author': str(comment.author),
                        'comment_score': comment.score,
                        'comment_created_utc': comment.created_utc
                    })

                valid_posts += 1
                if valid_posts >= post_limit:
                    raise StopIteration

        except StopIteration:
            break
        except Exception as e:
            print(f"Error searching r/{subreddit_name}: {e}")
            continue

    # Save as CSV
    folder = "Data"
    os.makedirs(folder, exist_ok=True)
    df = pd.DataFrame(data)
    csv_filename = os.path.join(folder, f"{filename}.csv")
    df.to_csv(csv_filename, index=False, encoding='utf-8')

    print(f"\nDone! {len(df)} total entries (including posts) saved from {valid_posts} posts into {csv_filename}")