In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

In [None]:
BRIGHT_DATA_REDDIT_SCRAPER_API_KEY = os.environ.get("BRIGHT_DATA_REDDIT_SCRAPER_API_KEY")
assert BRIGHT_DATA_REDDIT_SCRAPER_API_KEY is not None

In [None]:
def get_crawl_headers():
    return {
	"Authorization": f"Bearer {BRIGHT_DATA_REDDIT_SCRAPER_API_KEY}",
	"Content-Type": "application/json",
}

In [None]:
import requests


def perform_scrape_snapshot(subreddit_url, num_of_posts: int = 20):
    url = "https://api.brightdata.com/datasets/v3/trigger"
    headers = get_crawl_headers()
    params = {
    	"dataset_id": "gd_lvz8ah06191smkebj4",
    	"include_errors": "true",
    	"type": "discover_new",
    	"discover_by": "subreddit_url",
    	"limit_per_input": "100",
    }
    data = [
    	{"url": f"{subreddit_url}","sort_by":"Top","sort_by_time":"Today","num_of_posts":num_of_posts},
    ]
    
    response = requests.post(url, headers=headers, params=params, json=data)
    response.raise_for_status()
    data = response.json()
    return data.get("snapshot_id")


perform_scrape_snapshot("https://www.reddit.com/r/Django", num_of_posts=25)
    

In [None]:
snapshot_id='s_mf61dpsb2i1queb29'

In [None]:
def get_snapshot_progress(snapshot_id: str) -> bool:
    url = f"https://api.brightdata.com/datasets/v3/progress/{snapshot_id}"
    headers = get_crawl_headers()
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    data = response.json()
    return data.get('status') == 'ready'


get_snapshot_progress(snapshot_id)

In [None]:
def download_snapshot(snapshot_id: str) -> dict:
    url = f"https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id}"
    headers = get_crawl_headers()
    params = {
        "format": "json"
    }
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    data = response.json()
    return data

In [None]:
reddit_results = download_snapshot(snapshot_id)

In [None]:
reddit_results

In [None]:
for thread in reddit_results:
    # print(thread.keys())
    print(thread.get("title"), thread.get("num_upvotes"))
    print("\n")