This notebook collects all the game ID's from the Steam Web API: https://steamcommunity.com/dev and additional informattion from the Storefront API: https://wiki.teamfortress.com/wiki/User:RJackson/StorefrontAPI.
Then 

In [15]:
import json
from urllib.request import urlopen
import pandas as pd
import os
from tqdm.notebook import tqdm
import numpy as np
import time

# variable
num_apps = 2000 # number of reviews to scrape, if 0 all reviews will be scraped
# 5 minute bucket for 200 Requests, 100000 per day

all_games_url = "https://api.steampowered.com/ISteamApps/GetAppList/v2/"
review_url = "https://store.steampowered.com/appreviews/"
app_details_url = "https://store.steampowered.com/api/appdetails?appids="

In [16]:
# get all app ID
with urlopen(all_games_url) as url:
    apps = pd.json_normalize(json.load(url)["applist"], record_path =['apps'])

# pre-clean empty names
apps = apps.drop(apps[apps.name == ""].index)

# check if all should be scraped
if num_apps == 0: num_apps = len(apps)

# if data file not  exists 
if not os.path.exists("raw_data.csv"):
    
    # initialize empty one
    df = pd.DataFrame()

else: 
    # read out    
    df = pd.read_csv('raw_data.csv', index_col=0, low_memory=False)
    
    # select all new apps
    apps = apps.drop(apps[apps.appid.isin(df.appid)].index)

# select random num_apps of all apps
apps = apps.sample(frac=1).head(num_apps)

reviews = pd.DataFrame()
details = pd.DataFrame()

# iterate through the appids
for i, appid in enumerate(tqdm(apps.appid)):

    # every 200 API calls wait 5 minutes to reset the bucket
    if i%200 == 0 and i != 0:
        time.sleep(300)
    
    # create specific detail url
    with urlopen(app_details_url + str(appid)) as url:
        
        # collect details
        app_details = json.load(url)[str(appid)]
        
        # if api call was successfull
        if app_details["success"]:
            
            # concat to details frame
            details = pd.concat([details, pd.DataFrame([app_details["data"]])])
            
            # create specific review url
            with urlopen(review_url + str(appid) + "?json=1") as url:
                
                # concat to reviews frame
                reviews = pd.concat([reviews, pd.DataFrame([json.load(url)['query_summary']])])
        
        else:
            # if detail call were not successfull delete row in app df
            apps = apps.drop(apps[apps.appid == appid].index) 

# concate new data together and append to old data
df = pd.concat([df, 
                pd.concat([apps.reset_index(drop=True), 
                reviews.reset_index(drop=True),
                details.drop(["steam_appid", "name"], axis = 1).reset_index(drop=True)],
                    axis = 1)], ignore_index = True)

# overwrite csv
df.to_csv('raw_data.csv')
df.sample(1000).to_csv('raw_data_small.csv')


  0%|          | 0/2000 [00:00<?, ?it/s]