In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def get_top_life_simulation_games(page=1):
    # Steam search URL for Life Simulation games (tag ID 3877), sorted by topsellers
    url = f"https://store.steampowered.com/search/?supportedlang=english&filter=topsellers&page={page}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    games = []
    
    for result in soup.find_all('a', class_='search_result_row'):
        game_title = result.find('span', class_='title').text
        app_id = result['data-ds-appid']
        games.append({'title': game_title, 'app_id': app_id})
    
    return games

# Example: Get games from multiple pages
def scrape_and_save_to_csv(pages=5, output_file='seller_games.csv'):
    all_games = []
    
    # Loop through pages
    for page in range(1, pages + 1):
        print(f"Scraping page {page}...")
        games = get_top_life_simulation_games(page)
        all_games.extend(games)
    
    # Convert list of dictionaries to a DataFrame
    df = pd.DataFrame(all_games)
    
    # Save DataFrame to CSV
    df.to_csv(output_file, index=False)
    print(f"Data saved to {output_file}")

# Scrape the top games from multiple pages and save them to a CSV file
scrape_and_save_to_csv(pages=100, output_file='top_seller_games.csv')

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
Scraping page 40...
Scraping page 41...
Scraping page 42...
Scraping page 43...
Data saved to top_seller_games.csv


In [4]:
import requests
import pandas as pd
import time

def get_reviews(app_id, num_reviews=3000):
    reviews = []
    cursor = '*'
    url_template = f"https://store.steampowered.com/appreviews/{app_id}?json=1&num_per_page=100&cursor={{cursor}}"
    
    while len(reviews) < num_reviews:
        url = url_template.format(cursor=cursor)
        response = requests.get(url)
        data = response.json()
        
        # 检查是否有评论
        if not data.get('reviews'):
            break
        
        # 添加评论到列表
        for review in data['reviews']:
            reviews.append({
                'author': review['author']['steamid'],
                'review': review['review'],
                'rating': 'positive' if review['voted_up'] else 'negative',
                'timestamp': review['timestamp_created'],
                'helpful_count': review['votes_up'],
            })
        
        # 更新 cursor 获取下一页
        cursor = data['cursor']
        
        # 如果已经没有更多评论，可以提前退出循环
        if cursor == '':  # cursor 为空字符串时表示没有更多数据
            break
        
        # 防止过于频繁地请求，稍作延迟
        time.sleep(2)
    
    # 返回指定数量的评论
    return reviews[:num_reviews]

# Load the CSV file
df_games = pd.read_csv('top_seller_games.csv')

# Collect reviews for all games in the list
all_reviews = []

for index, row in df_games.iterrows():
    app_id = row['app_id']
    game_title = row['title']
    print(f"Fetching reviews for {game_title} (AppID: {app_id})")
    
    reviews = get_reviews(app_id)
    for review in reviews:
        review['game_title'] = game_title
        all_reviews.append(review)
    
    # Add a delay between requests to avoid hitting rate limits
    time.sleep(2)

# Save all reviews to a new CSV file
df_reviews = pd.DataFrame(all_reviews)
df_reviews.to_csv('top_seller_game_reviews.csv', index=False)

Fetching reviews for Counter-Strike 2 (AppID: 730)
