In [8]:
import requests
import pandas as pd
import time
import os
from tqdm import tqdm
import matplotlib.pyplot as plt

# Create output directory if not exists
output_dir = "steamspy_pages"
os.makedirs(output_dir, exist_ok=True)

page = 0
pbar = tqdm(desc="Downloading SteamSpy Pages", unit="page")

while True:
    url = f"https://steamspy.com/api.php?request=all&page={page}"
    try:
        response = requests.get(url)
        data = response.json()

        if not data:
            print("No more data. Exiting loop.")
            break

        # Convert JSON dict to DataFrame
        df = pd.DataFrame.from_dict(data, orient='index')

        # Save to CSV
        filename = os.path.join(output_dir, f"steamspy_page_{page}.csv")
        df.to_csv(filename, index=False)

        # Update progress bar
        pbar.update(1)
        pbar.set_postfix_str(f"Page {page} saved")

        # Sleep to avoid hitting the request limit
        time.sleep(3)
        page += 1

    except Exception as e:
        print(f"Error on page {page}: {e}")
        break

pbar.close()


Downloading SteamSpy Pages: 36page [01:56,  3.25s/page, Page 35 saved]

Error on page 36: Expecting value: line 1 column 1 (char 0)





In [None]:
import requests
import pandas as pd
import time

appid=2246340 #2767030
num_pages=100

def fetch_reviews(appid, num_pages=10, delay=1):
    all_reviews = []
    cursor = '*'

    for page in range(num_pages):
        params = {
            'json': 1,
            'filter': 'recent',
            'language': 'english', 
            'purchase_type': 'all',
            'cursor': cursor
        }
        response = requests.get(f'https://store.steampowered.com/appreviews/{appid}', params=params)
        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")
            break

        data = response.json()
        reviews = data.get('reviews', [])
        if not reviews:
            print("No more reviews available.")
            break

        for review in reviews:
            author = review.get('author', {})
            review_data = {
                'steamid': author.get('steamid'),
                'num_games_owned': author.get('num_games_owned'),
                'num_reviews': author.get('num_reviews'),
                'playtime_forever': author.get('playtime_forever'),
                'playtime_last_two_weeks': author.get('playtime_last_two_weeks'),
                'review_text': review.get('review'),
                'timestamp_created': review.get('timestamp_created'),
                'voted_up': review.get('voted_up'),
                'votes_up': review.get('votes_up'),
                'votes_funny': review.get('votes_funny')
            }
            all_reviews.append(review_data)

        cursor = data.get('cursor', '')
        if not cursor:
            print("No further cursor found, ending pagination.")
            break

        print(f"Page {page + 1} fetched successfully.")
        time.sleep(delay)

    return all_reviews

# Fetch reviews for Marvel Rivals (AppID: 2767030)
reviews = fetch_reviews(appid, num_pages)

# Convert to DataFrame
df = pd.DataFrame(reviews)

# Display the first few rows
df.head()

# Optionally, save to CSV
df.to_csv(f"./reviews/{appid}.csv")

Page 1 fetched successfully.
Page 2 fetched successfully.
Page 3 fetched successfully.
Page 4 fetched successfully.
Page 5 fetched successfully.
Page 6 fetched successfully.
Page 7 fetched successfully.
Page 8 fetched successfully.
Page 9 fetched successfully.
Page 10 fetched successfully.


In [3]:
!pip install dotenv

Collecting dotenv
  Using cached dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting python-dotenv (from dotenv)
  Using cached python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Using cached dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)
Using cached python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv, dotenv
Successfully installed dotenv-0.9.9 python-dotenv-1.1.0


In [5]:
import json
from dotenv import load_dotenv
import json
import os

# Load variables from .env file
load_dotenv()

# Access the API key
API_KEY = os.getenv("STEAM_API_KEY")

steam_ids = df["steamid"].to_list()


def collect_user_data(steam_ids, api_key):
    """Collect game libraries and playtime for multiple users"""
    user_data = []
    
    for steam_id in steam_ids:
        try:
            url = f"https://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key={api_key}&steamid={steam_id}&format=json&include_appinfo=1"
            response = requests.get(url)
            
            if response.status_code == 200:
                data = response.json()
                if 'response' in data and 'games' in data['response']:
                    user_data.append({
                        'steam_id': steam_id,
                        'response': data['response']
                    })
                    print(f"Collected data for user {steam_id}: {len(data['response']['games'])} games")
            
            # Respect API rate limits
            time.sleep(1.1)
            
        except Exception as e:
            print(f"Error collecting data for user {steam_id}: {e}")
    
    # Save the collected data
    with open(f'users/user_data_{appid}.json', 'w') as f:
        json.dump(user_data, f)
    
    return user_data

# Execute data collection
user_data = collect_user_data(steam_ids, API_KEY)
print(f"Users: {len(user_data)}")

Collected data for user 76561198964977133: 15 games
Collected data for user 76561199060737113: 81 games
Collected data for user 76561198813372460: 27 games
Collected data for user 76561198110916760: 195 games
Collected data for user 76561198162654103: 150 games
Collected data for user 76561198313656273: 98 games
Collected data for user 76561198097716218: 290 games
Collected data for user 76561198038000288: 533 games
Collected data for user 76561198125663460: 168 games
Collected data for user 76561198050176275: 16 games
Collected data for user 76561198296903020: 97 games
Collected data for user 76561198262228881: 158 games
Collected data for user 76561198158674272: 331 games
Collected data for user 76561199018149056: 187 games
Collected data for user 76561198171459065: 326 games
Collected data for user 76561199068780229: 51 games
Collected data for user 76561199143930225: 210 games
Collected data for user 76561198141387936: 261 games
Collected data for user 76561198414324449: 52 games
C