# Review Scraper
This notebook contains code to scrape the reviews list from each game (app) scraped from the steam-applist-scraper.

It uses the steamreviews pip project.

Since steamreviews breaks down the processed ids into multiple files, use the function `load_id_processed_files` passing the filenames of each file of processed ids to join them, and then pass then to the `steamreviews.download_reviews_for_app_id_batch` call, like the example:

```python
processed_ids = load_id_processed_files(["./idprocessed_on_20250918.txt", "./idprocessed_on_20250919.txt"])

steamreviews.download_reviews_for_app_id_batch(
    filtered_app_ids, previously_processed_app_ids=processed_ids)
```

In [2]:
import steamreviews

In [3]:
def load_id_processed_files(file_paths: list[str]) -> list[int]:
    processed_ids = set()
    for file_path in file_paths:
        try:
            with open(file_path, 'r') as f:
              for line in f:
                try:
                  app_id = int(line.strip())
                  processed_ids.add(app_id)
                except ValueError:
                  print(f"Skipping invalid line: {line.strip()} in {file_path}")
        except FileNotFoundError:
            print(f"File not found: {file_path}, skipping.")
            continue
    return list(processed_ids)

In [19]:
from datetime import date, timedelta


def idprocessed_filenames_since_start(start_date: date) -> str:
    today = date.today()

    num_days = (today - start_date).days

    date_strings = [(start_date + timedelta(days=i)).strftime("%Y%m%d")
                    for i in range(num_days + 1)]
    return [f"./idprocessed_on_{d}.txt" for d in date_strings]


idprocessed_filenames_since_start(date(2025, 9, 18))

['./idprocessed_on_20250918.txt',
 './idprocessed_on_20250919.txt',
 './idprocessed_on_20250920.txt',
 './idprocessed_on_20250921.txt']

In [4]:
import json
with open('../steam-applist-scraper/filtered_apps_ids.json', 'r') as f:
  filtered_app_ids = json.load(f)
  filtered_app_ids = [int(appid) for appid in filtered_app_ids]

filtered_app_ids

[1048580,
 10,
 3145740,
 20,
 2097180,
 30,
 40,
 2097200,
 50,
 60,
 1048640,
 70,
 3145800,
 2097230,
 80,
 3145810,
 1048660,
 2097240,
 100,
 1048690,
 130,
 3145860,
 1048710,
 1048740,
 1048780,
 1048790,
 220,
 240,
 2097410,
 1048850,
 280,
 3146010,
 1048860,
 3146020,
 300,
 1048890,
 320,
 3146050,
 1048900,
 2097490,
 340,
 1048920,
 360,
 1048950,
 380,
 1048960,
 2097540,
 2097550,
 400,
 2097570,
 420,
 1049010,
 1049030,
 2097620,
 3146200,
 2097630,
 1049070,
 500,
 1049080,
 1049090,
 2097670,
 3146250,
 1049100,
 3146270,
 550,
 1049130,
 1049140,
 2097720,
 2097730,
 2097740,
 3146320,
 2097750,
 2097770,
 620,
 3146360,
 2097790,
 1049220,
 2097810,
 659,
 3146400,
 2097830,
 1049270,
 1049280,
 2097860,
 2097880,
 1049310,
 2097890,
 1049320,
 2097900,
 2097910,
 1049340,
 2097920,
 3146500,
 2097930,
 3146510,
 3146520,
 1049370,
 1049380,
 2097960,
 1049410,
 2098010,
 2098040,
 3146680,
 1049560,
 1002,
 3146730,
 2098160,
 3146740,
 2098170,
 2098180,
 104963

In [20]:
processed_ids = load_id_processed_files(
    idprocessed_filenames_since_start(date(2025, 9, 18)))
processed_ids

[3153920,
 1064960,
 1048580,
 2113540,
 10,
 3145740,
 20,
 2105370,
 2097180,
 3162140,
 30,
 1064990,
 40,
 2113580,
 2097200,
 50,
 2113590,
 2105400,
 3162170,
 60,
 1065020,
 1048640,
 16450,
 70,
 1065030,
 3145800,
 1056840,
 2097230,
 80,
 3145810,
 1048660,
 2097240,
 1065050,
 100,
 2105450,
 1065070,
 1048690,
 2113650,
 2105460,
 16500,
 8310,
 2113660,
 8320,
 130,
 3145860,
 1048710,
 2105480,
 8330,
 1065100,
 2113680,
 2113690,
 3162270,
 1048740,
 2105520,
 3162290,
 1056960,
 1056970,
 1048780,
 8400,
 3154130,
 1048790,
 16600,
 220,
 1065180,
 3162340,
 2113770,
 16620,
 240,
 3154160,
 1065200,
 3154170,
 1065210,
 2113790,
 2105600,
 2097410,
 1057030,
 2105610,
 1065230,
 1048850,
 2113810,
 280,
 3146010,
 1048860,
 1065250,
 3146020,
 300,
 1065260,
 1048890,
 2113850,
 2105660,
 320,
 3146050,
 1057090,
 1048900,
 2105670,
 16710,
 2105680,
 16720,
 2097490,
 340,
 1048920,
 16730,
 3154270,
 1065310,
 2105700,
 360,
 1065320,
 2113900,
 1048950,
 380,
 10571

In [1]:
import steamreviews

app_ids = [329070, 573170]
steamreviews.download_reviews_for_app_id_batch(app_ids)

Loading idprocessed_on_20250918.txt
Creating idprocessed_on_20250918.txt
Downloading reviews for appID = 329070
[appID = 329070] expected #reviews = 1550
[appID = 329070] num_reviews = 1550 (expected: 1550)
Downloading reviews for appID = 573170
[appID = 573170] expected #reviews = 393
[appID = 573170] num_reviews = 393 (expected: 393)
Game records written: 2


True

In [3]:
review_dict = steamreviews.load_review_dict(329070)
review_dict

{'reviews': {'204197122': {'recommendationid': '204197122',
   'author': {'steamid': '76561199031859149',
    'num_games_owned': 221,
    'num_reviews': 119,
    'playtime_forever': 94,
    'playtime_last_two_weeks': 1,
    'playtime_at_review': 94,
    'last_played': 1757484907},
   'language': 'english',
   'review': 'abandoned',
   'timestamp_created': 1757722658,
   'timestamp_updated': 1757722658,
   'voted_up': False,
   'votes_up': 0,
   'votes_funny': 0,
   'weighted_vote_score': '0.476190477609634399',
   'comment_count': 0,
   'steam_purchase': True,
   'received_for_free': False,
   'written_during_early_access': True,
   'primarily_steam_deck': False},
  '203980726': {'recommendationid': '203980726',
   'author': {'steamid': '76561199106401100',
    'num_games_owned': 0,
    'num_reviews': 2,
    'playtime_forever': 11290,
    'playtime_last_two_weeks': 330,
    'playtime_at_review': 10973,
    'last_played': 1758159444},
   'language': 'english',
   'review': "Incredible g

In [8]:
list(review_dict['reviews'].items())[:3]

[('204197122',
  {'recommendationid': '204197122',
   'author': {'steamid': '76561199031859149',
    'num_games_owned': 221,
    'num_reviews': 119,
    'playtime_forever': 94,
    'playtime_last_two_weeks': 1,
    'playtime_at_review': 94,
    'last_played': 1757484907},
   'language': 'english',
   'review': 'abandoned',
   'timestamp_created': 1757722658,
   'timestamp_updated': 1757722658,
   'voted_up': False,
   'votes_up': 0,
   'votes_funny': 0,
   'weighted_vote_score': '0.476190477609634399',
   'comment_count': 0,
   'steam_purchase': True,
   'received_for_free': False,
   'written_during_early_access': True,
   'primarily_steam_deck': False}),
 ('203980726',
  {'recommendationid': '203980726',
   'author': {'steamid': '76561199106401100',
    'num_games_owned': 0,
    'num_reviews': 2,
    'playtime_forever': 11290,
    'playtime_last_two_weeks': 330,
    'playtime_at_review': 10973,
    'last_played': 1758159444},
   'language': 'english',
   'review': "Incredible game. T

In [10]:
steamreviews.download_reviews_for_app_id_batch(filtered_app_ids)

Loading idprocessed_on_20250918.txt
Downloading reviews for appID = 1048580
[appID = 1048580] expected #reviews = 72
[appID = 1048580] num_reviews = 72 (expected: 72)
Downloading reviews for appID = 10
[appID = 10] expected #reviews = 252434
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of queries 150 reached. Cooldown: 310 seconds
Number of querie

ConnectionError: HTTPSConnectionPool(host='store.steampowered.com', port=443): Max retries exceeded with url: /appreviews/240?json=1&language=all&filter=recent&review_type=all&purchase_type=all&num_per_page=100&appids=240&cursor=AoJwj9yPyowDeqSQzAQ%3D (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7a9b795d8680>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [8]:
processed_ids = load_id_processed_files(
    ["./idprocessed_on_20250918.txt", "./idprocessed_on_20250919.txt", "./idprocessed_on_20250920.txt"])
processed_ids

[2101250,
 1048580,
 3147780,
 4100,
 10,
 3145740,
 2101260,
 3149840,
 20,
 2099220,
 2101270,
 3149850,
 2097180,
 30,
 2101280,
 1050660,
 40,
 2101290,
 3149870,
 2097200,
 50,
 2100,
 3147830,
 3149880,
 60,
 1048640,
 1050690,
 70,
 2099270,
 3145800,
 1050700,
 2097230,
 1052750,
 80,
 3145810,
 2130,
 1048660,
 2101330,
 2097240,
 1052760,
 2101340,
 3147870,
 1050720,
 100,
 2101350,
 1050730,
 3149820,
 2101360,
 1048690,
 3147890,
 3149940,
 3147900,
 130,
 3145860,
 1048710,
 4230,
 3149960,
 3147920,
 4240,
 3149970,
 2099350,
 2200,
 3149980,
 2210,
 2101410,
 1048740,
 3147940,
 2099370,
 3147950,
 1052850,
 3147960,
 1050810,
 2099390,
 2101440,
 4290,
 3150020,
 1052870,
 2099400,
 1048780,
 4300,
 3150030,
 1048790,
 1052890,
 220,
 2099420,
 2270,
 3148000,
 1050850,
 3150050,
 1052900,
 2099430,
 2280,
 2101480,
 3148010,
 240,
 2101490,
 1050880,
 2097410,
 2310,
 3148040,
 2320,
 1048850,
 1050900,
 280,
 3146010,
 1048860,
 1050910,
 3146020,
 3150120,
 1052970,

In [16]:
steamreviews.download_reviews_for_app_id_batch(
    filtered_app_ids, previously_processed_app_ids=processed_ids)

Skipping previously found appID = 1048580
Skipping previously found appID = 10
Skipping previously found appID = 3145740
Skipping previously found appID = 20
Skipping previously found appID = 2097180
Skipping previously found appID = 30
Skipping previously found appID = 40
Skipping previously found appID = 2097200
Skipping previously found appID = 50
Skipping previously found appID = 60
Skipping previously found appID = 1048640
Skipping previously found appID = 70
Skipping previously found appID = 3145800
Skipping previously found appID = 2097230
Skipping previously found appID = 80
Skipping previously found appID = 3145810
Skipping previously found appID = 1048660
Skipping previously found appID = 2097240
Skipping previously found appID = 100
Skipping previously found appID = 1048690
Skipping previously found appID = 130
Skipping previously found appID = 3145860
Skipping previously found appID = 1048710
Skipping previously found appID = 1048740
Skipping previously found appID = 104878

KeyboardInterrupt: 

In [7]:
steamreviews.download_reviews_for_app_id_batch(
    filtered_app_ids, previously_processed_app_ids=processed_ids)

Skipping previously found appID = 1048580
Skipping previously found appID = 10
Skipping previously found appID = 3145740
Skipping previously found appID = 20
Skipping previously found appID = 2097180
Skipping previously found appID = 30
Skipping previously found appID = 40
Skipping previously found appID = 2097200
Skipping previously found appID = 50
Skipping previously found appID = 60
Skipping previously found appID = 1048640
Skipping previously found appID = 70
Skipping previously found appID = 3145800
Skipping previously found appID = 2097230
Skipping previously found appID = 80
Skipping previously found appID = 3145810
Skipping previously found appID = 1048660
Skipping previously found appID = 2097240
Skipping previously found appID = 100
Skipping previously found appID = 1048690
Skipping previously found appID = 130
Skipping previously found appID = 3145860
Skipping previously found appID = 1048710
Skipping previously found appID = 1048740
Skipping previously found appID = 104878

ConnectionError: HTTPSConnectionPool(host='store.steampowered.com', port=443): Max retries exceeded with url: /appreviews/6060?json=1&language=all&filter=recent&review_type=all&purchase_type=all&num_per_page=100&appids=6060&cursor=AoJwnPqbkN8Cd86viAE%3D (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x776d75bbddc0>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [9]:
steamreviews.download_reviews_for_app_id_batch(
    filtered_app_ids, previously_processed_app_ids=processed_ids)

Skipping previously found appID = 1048580
Skipping previously found appID = 10
Skipping previously found appID = 3145740
Skipping previously found appID = 20
Skipping previously found appID = 2097180
Skipping previously found appID = 30
Skipping previously found appID = 40
Skipping previously found appID = 2097200
Skipping previously found appID = 50
Skipping previously found appID = 60
Skipping previously found appID = 1048640
Skipping previously found appID = 70
Skipping previously found appID = 3145800
Skipping previously found appID = 2097230
Skipping previously found appID = 80
Skipping previously found appID = 3145810
Skipping previously found appID = 1048660
Skipping previously found appID = 2097240
Skipping previously found appID = 100
Skipping previously found appID = 1048690
Skipping previously found appID = 130
Skipping previously found appID = 3145860
Skipping previously found appID = 1048710
Skipping previously found appID = 1048740
Skipping previously found appID = 104878

ConnectionError: HTTPSConnectionPool(host='store.steampowered.com', port=443): Max retries exceeded with url: /appreviews/10500?json=1&language=all&filter=recent&review_type=all&purchase_type=all&num_per_page=100&appids=10500&cursor=AoJ4kNC5lvsCc%2F2A8wI%3D (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x776d88463500>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [12]:
steamreviews.download_reviews_for_app_id_batch(
    filtered_app_ids, previously_processed_app_ids=processed_ids)

Skipping previously found appID = 1048580
Skipping previously found appID = 10
Skipping previously found appID = 3145740
Skipping previously found appID = 20
Skipping previously found appID = 2097180
Skipping previously found appID = 30
Skipping previously found appID = 40
Skipping previously found appID = 2097200
Skipping previously found appID = 50
Skipping previously found appID = 60
Skipping previously found appID = 1048640
Skipping previously found appID = 70
Skipping previously found appID = 3145800
Skipping previously found appID = 2097230
Skipping previously found appID = 80
Skipping previously found appID = 3145810
Skipping previously found appID = 1048660
Skipping previously found appID = 2097240
Skipping previously found appID = 100
Skipping previously found appID = 1048690
Skipping previously found appID = 130
Skipping previously found appID = 3145860
Skipping previously found appID = 1048710
Skipping previously found appID = 1048740
Skipping previously found appID = 104878

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

In [16]:
steamreviews.download_reviews_for_app_id_batch(
    filtered_app_ids, previously_processed_app_ids=processed_ids)

Skipping previously found appID = 1048580
Skipping previously found appID = 10
Skipping previously found appID = 3145740
Skipping previously found appID = 20
Skipping previously found appID = 2097180
Skipping previously found appID = 30
Skipping previously found appID = 40
Skipping previously found appID = 2097200
Skipping previously found appID = 50
Skipping previously found appID = 60
Skipping previously found appID = 1048640
Skipping previously found appID = 70
Skipping previously found appID = 3145800
Skipping previously found appID = 2097230
Skipping previously found appID = 80
Skipping previously found appID = 3145810
Skipping previously found appID = 1048660
Skipping previously found appID = 2097240
Skipping previously found appID = 100
Skipping previously found appID = 1048690
Skipping previously found appID = 130
Skipping previously found appID = 3145860
Skipping previously found appID = 1048710
Skipping previously found appID = 1048740
Skipping previously found appID = 104878

ConnectionError: HTTPSConnectionPool(host='store.steampowered.com', port=443): Max retries exceeded with url: /appreviews/22380?json=1&language=all&filter=recent&review_type=all&purchase_type=all&num_per_page=100&appids=22380&cursor=AoJ42ODT4IMDdIHO1wM%3D (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x776d75144920>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [None]:
from time import sleep
from requests import ConnectionError

while True:
  try:
    processed_ids = load_id_processed_files(
      idprocessed_filenames_since_start(date(2025, 9, 18)))
    steamreviews.download_reviews_for_app_id_batch(
        filtered_app_ids, previously_processed_app_ids=processed_ids)
  except ConnectionError as e:
    print(f"Connection error occurred: {e}")
    sleep(15)
    print("\n\n\n\n\n\n\n")
    print("Retrying the download...")

Skipping previously found appID = 1048580
Skipping previously found appID = 10
Skipping previously found appID = 3145740
Skipping previously found appID = 20
Skipping previously found appID = 2097180
Skipping previously found appID = 30
Skipping previously found appID = 40
Skipping previously found appID = 2097200
Skipping previously found appID = 50
Skipping previously found appID = 60
Skipping previously found appID = 1048640
Skipping previously found appID = 70
Skipping previously found appID = 3145800
Skipping previously found appID = 2097230
Skipping previously found appID = 80
Skipping previously found appID = 3145810
Skipping previously found appID = 1048660
Skipping previously found appID = 2097240
Skipping previously found appID = 100
Skipping previously found appID = 1048690
Skipping previously found appID = 130
Skipping previously found appID = 3145860
Skipping previously found appID = 1048710
Skipping previously found appID = 1048740
Skipping previously found appID = 104878