In [2]:
# File: steam_crawler_test.py
import requests
import pandas as pd
import time
from tqdm import tqdm

def fetch_recent_reviews(appid, language, max_reviews):
    """爬取指定语言的最新评论（按时间倒序）"""
    url = f"https://store.steampowered.com/appreviews/{appid}"
    reviews = []
    cursor = '*'
    
    # 进度条配置
    pbar = tqdm(total=max_reviews, desc=f"爬取进度（{language}）", unit="条")
    
    while len(reviews) < max_reviews:
        params = {
            'json': 1,
            'filter': 'recent',
            'language': language,
            'num_per_page': 100,
            'cursor': cursor,
            'day_range': 9223372036854775807  # 最大时间范围
        }
        
        try:
            response = requests.get(url, params=params, timeout=15)
            response.raise_for_status()
            data = response.json()
            
            if not data.get('success', False):
                print(f"请求失败：{data.get('error', '未知错误')}")
                break
                
            batch = data.get('reviews', [])
            if not batch:
                break
                
            # 去重逻辑
            new_reviews = [r for r in batch if r['recommendationid'] not in {x['recommendationid'] for x in reviews}]
            reviews.extend(new_reviews)
            pbar.update(len(new_reviews))
            
            cursor = data.get('cursor')
            if not cursor or len(reviews) >= max_reviews:
                break
                
            time.sleep(1.5)  # 严格遵守反爬限制
            
        except Exception as e:
            print(f"发生错误: {str(e)}，重试中...")
            time.sleep(5)
    
    pbar.close()
    return pd.DataFrame(reviews[:max_reviews])

# 执行测试爬取
if __name__ == "__main__":
    appid = 2358720  # 黑神话：悟空
    df = fetch_recent_reviews(appid, 'english', 140000)
    
    # 保存完整字段
    df.to_csv('wukong_english_filterRecent_V2.csv', 
             index=False, 
             encoding='utf-8-sig',
             quoting=1)  # 处理含逗号的文本
    
    print(f"已保存 {len(df)} 条测试评论")


爬取进度（english）:   0%|          | 0/140000 [00:00<?, ?条/s][A
爬取进度（english）:   0%|          | 100/140000 [00:01<32:53, 70.90条/s][A
爬取进度（english）:   0%|          | 200/140000 [00:04<50:57, 45.73条/s][A
爬取进度（english）:   0%|          | 300/140000 [00:06<55:19, 42.09条/s][A
爬取进度（english）:   0%|          | 400/140000 [00:09<1:01:02, 38.12条/s][A
爬取进度（english）:   0%|          | 500/140000 [00:12<1:01:50, 37.59条/s][A
爬取进度（english）:   0%|          | 600/140000 [00:14<1:00:37, 38.32条/s][A
爬取进度（english）:   0%|          | 700/140000 [00:17<56:59, 40.74条/s]  [A
爬取进度（english）:   1%|          | 800/140000 [00:19<55:44, 41.61条/s][A
爬取进度（english）:   1%|          | 899/140000 [00:21<54:59, 42.16条/s][A
爬取进度（english）:   1%|          | 999/140000 [00:24<55:25, 41.80条/s][A
爬取进度（english）:   1%|          | 1098/140000 [00:26<58:00, 39.91条/s][A
爬取进度（english）:   1%|          | 1198/140000 [00:29<56:18, 41.09条/s][A
爬取进度（english）:   1%|          | 1298/140000 [00:31<59:16, 39.00条/s][A
爬取进度（english）:   1

爬取进度（english）:   8%|▊         | 11398/140000 [04:58<1:00:44, 35.29条/s][A
爬取进度（english）:   8%|▊         | 11498/140000 [05:01<58:18, 36.73条/s]  [A
爬取进度（english）:   8%|▊         | 11598/140000 [05:03<55:47, 38.35条/s][A
爬取进度（english）:   8%|▊         | 11698/140000 [05:05<53:49, 39.72条/s][A
爬取进度（english）:   8%|▊         | 11798/140000 [05:08<55:05, 38.78条/s][A
爬取进度（english）:   8%|▊         | 11898/140000 [05:11<58:43, 36.36条/s][A
爬取进度（english）:   9%|▊         | 11998/140000 [05:14<1:00:14, 35.41条/s][A
爬取进度（english）:   9%|▊         | 12098/140000 [05:17<1:01:14, 34.81条/s][A
爬取进度（english）:   9%|▊         | 12198/140000 [05:19<58:00, 36.72条/s]  [A
爬取进度（english）:   9%|▉         | 12298/140000 [05:22<55:03, 38.66条/s][A
爬取进度（english）:   9%|▉         | 12398/140000 [05:24<53:53, 39.46条/s][A
爬取进度（english）:   9%|▉         | 12498/140000 [05:26<52:24, 40.55条/s][A
爬取进度（english）:   9%|▉         | 12598/140000 [05:29<54:14, 39.15条/s][A
爬取进度（english）:   9%|▉         | 12698/140000 [05:32<56

爬取进度（english）:  16%|█▌        | 22498/140000 [10:34<1:03:43, 30.73条/s][A
爬取进度（english）:  16%|█▌        | 22598/140000 [10:37<1:01:01, 32.06条/s][A
爬取进度（english）:  16%|█▌        | 22698/140000 [10:40<1:03:31, 30.78条/s][A
爬取进度（english）:  16%|█▋        | 22798/140000 [10:43<1:02:27, 31.27条/s][A
爬取进度（english）:  16%|█▋        | 22898/140000 [10:47<1:02:39, 31.15条/s][A
爬取进度（english）:  16%|█▋        | 22998/140000 [10:50<1:01:40, 31.62条/s][A
爬取进度（english）:  16%|█▋        | 23098/140000 [10:53<1:02:00, 31.42条/s][A
爬取进度（english）:  17%|█▋        | 23198/140000 [10:56<1:01:43, 31.54条/s][A
爬取进度（english）:  17%|█▋        | 23298/140000 [11:00<1:04:10, 30.31条/s][A
爬取进度（english）:  17%|█▋        | 23398/140000 [11:03<1:01:22, 31.67条/s][A
爬取进度（english）:  17%|█▋        | 23498/140000 [11:06<1:01:39, 31.49条/s][A
爬取进度（english）:  17%|█▋        | 23598/140000 [11:09<1:00:03, 32.30条/s][A
爬取进度（english）:  17%|█▋        | 23698/140000 [11:12<1:00:21, 32.11条/s][A
爬取进度（english）:  17%|█▋        | 23798/

爬取进度（english）:  24%|██▍       | 33598/140000 [16:30<59:06, 30.00条/s][A
爬取进度（english）:  24%|██▍       | 33698/140000 [16:33<58:48, 30.12条/s][A
爬取进度（english）:  24%|██▍       | 33798/140000 [16:36<54:48, 32.30条/s][A
爬取进度（english）:  24%|██▍       | 33898/140000 [16:38<53:50, 32.84条/s][A
爬取进度（english）:  24%|██▍       | 33998/140000 [16:42<54:57, 32.15条/s][A
爬取进度（english）:  24%|██▍       | 34098/140000 [16:46<58:47, 30.02条/s][A
爬取进度（english）:  24%|██▍       | 34198/140000 [16:48<56:30, 31.20条/s][A
爬取进度（english）:  24%|██▍       | 34298/140000 [16:51<54:57, 32.06条/s][A
爬取进度（english）:  25%|██▍       | 34398/140000 [16:55<58:29, 30.09条/s][A
爬取进度（english）:  25%|██▍       | 34498/140000 [16:58<55:53, 31.46条/s][A
爬取进度（english）:  25%|██▍       | 34598/140000 [17:01<53:55, 32.58条/s][A
爬取进度（english）:  25%|██▍       | 34698/140000 [17:05<57:59, 30.27条/s][A
爬取进度（english）:  25%|██▍       | 34798/140000 [17:07<54:39, 32.08条/s][A
爬取进度（english）:  25%|██▍       | 34898/140000 [17:11<54:57, 31.88

爬取进度（english）:  32%|███▏      | 44898/140000 [22:47<53:52, 29.42条/s][A
爬取进度（english）:  32%|███▏      | 44998/140000 [22:51<54:39, 28.97条/s][A
爬取进度（english）:  32%|███▏      | 45098/140000 [22:54<52:52, 29.91条/s][A
爬取进度（english）:  32%|███▏      | 45198/140000 [22:57<52:56, 29.85条/s][A
爬取进度（english）:  32%|███▏      | 45298/140000 [23:00<52:47, 29.90条/s][A
爬取进度（english）:  32%|███▏      | 45398/140000 [23:05<58:41, 26.87条/s][A
爬取进度（english）:  32%|███▏      | 45498/140000 [23:09<1:00:22, 26.08条/s][A
爬取进度（english）:  33%|███▎      | 45598/140000 [23:13<59:24, 26.48条/s]  [A
爬取进度（english）:  33%|███▎      | 45698/140000 [23:16<55:36, 28.26条/s][A
爬取进度（english）:  33%|███▎      | 45798/140000 [23:19<54:11, 28.97条/s][A
爬取进度（english）:  33%|███▎      | 45898/140000 [23:23<55:27, 28.28条/s][A
爬取进度（english）:  33%|███▎      | 45998/140000 [23:26<54:22, 28.82条/s][A
爬取进度（english）:  33%|███▎      | 46098/140000 [23:29<53:10, 29.43条/s][A
爬取进度（english）:  33%|███▎      | 46198/140000 [23:32<51:48, 3

爬取进度（english）:  40%|████      | 56194/140000 [29:31<53:05, 26.31条/s][A
爬取进度（english）:  40%|████      | 56294/140000 [29:36<55:24, 25.18条/s][A
爬取进度（english）:  40%|████      | 56394/140000 [29:40<56:41, 24.58条/s][A
爬取进度（english）:  40%|████      | 56494/140000 [29:45<59:35, 23.36条/s][A
爬取进度（english）:  40%|████      | 56594/140000 [29:48<55:34, 25.01条/s][A
爬取进度（english）:  40%|████      | 56694/140000 [29:52<56:29, 24.58条/s][A
爬取进度（english）:  41%|████      | 56794/140000 [29:56<54:38, 25.38条/s][A
爬取进度（english）:  41%|████      | 56894/140000 [30:00<55:09, 25.11条/s][A
爬取进度（english）:  41%|████      | 56994/140000 [30:04<55:19, 25.00条/s][A
爬取进度（english）:  41%|████      | 57094/140000 [30:08<56:12, 24.58条/s][A
爬取进度（english）:  41%|████      | 57194/140000 [30:12<53:58, 25.57条/s][A
爬取进度（english）:  41%|████      | 57294/140000 [30:16<54:25, 25.32条/s][A
爬取进度（english）:  41%|████      | 57394/140000 [30:19<52:02, 26.45条/s][A
爬取进度（english）:  41%|████      | 57494/140000 [30:24<53:41, 25.61

已保存 60734 条测试评论
