导入所使用的包

In [21]:
import pandas as pd
import json
import os

读取数据

In [22]:
# 设定 JSON 文件的目录
directory = '../../data/processed/json_datas/'

# 初始化一个空的 DataFrame
df_combined = pd.DataFrame()

# 遍历目录中的每个文件
for file_name in os.listdir(directory):
    if file_name.endswith('.json'):
        # 构造完整的文件路径
        file_path = os.path.join(directory, file_name)
        # 读取 JSON 文件
        data = pd.read_json(file_path)
        # 将读取的数据添加到主 DataFrame
        df_combined = pd.concat([df_combined, data], ignore_index=True)

'''将整理好的数据整合成series'''
df_combined
df = pd.DataFrame(df_combined)
se = df.stack().reset_index(drop=True) 
se.dropna()
se

0       {'730': {'success': True, 'data': {'type': 'ga...
1       {'252950': {'success': True, 'data': {'type': ...
2       {'782330': {'success': True, 'data': {'type': ...
3       {'252490': {'success': True, 'data': {'type': ...
4       {'10': {'success': True, 'data': {'type': 'gam...
                              ...                        
3053    {'1548850': {'success': True, 'data': {'type':...
3054    {'13210': {'success': True, 'data': {'type': '...
3055    {'2607060': {'success': True, 'data': {'type':...
3056    {'1307890': {'success': True, 'data': {'type':...
3057    {'2708450': {'success': True, 'data': {'type':...
Length: 3058, dtype: object

In [26]:
'''定义抓取数据的函数'''

def extract_game_info(se):
    results = []
    for i in range(len(se)):
        try:
            key = list(se[i].keys())[0]
            game_data = se[i][key]['data']
            fields = [
                ('name', 'Unknown', str),
                ('steam_appid', 'Unknown', str),
                ('required_age', 'Unknown', str),
                ('is_free', False, bool),
                ('dlc', [], lambda x: len(x)),
                ('website', 'No website', str),
                ('pc_requirements', {}, lambda x: x),
                ('developers', ['Unknown'], lambda x: x),
                ('publishers', ['Unknown'], lambda x: x),
                ('categories', [], lambda x: [item['description'] for item in x if 'description' in item]),
                ('genres', [], lambda x: [item['description'] for item in x if 'description' in item]),
                ('ratings', {}, lambda x: x),
                ('price_overview', {}, lambda x: x),
                ('metacritic', {'score': 'No score'}, lambda x: x.get('score', 'No score')),
                ('recommendations', {'total': 0}, lambda x: x.get('total', 0)),
                ('achievements', {'total': 0}, lambda x: x.get('total', 0) != 0),
                ('release_date', {'date': 'Unknown date'}, lambda x: x.get('date', 'Unknown date'))
            ]
            game_series = pd.Series({field: func(game_data.get(field, default)) for field, default, func in fields})
            results.append(game_series)
        except Exception as e: # 当出现找不到键时候报错,并不用担心，经查阅appid发现是官方未给出相关信息
            # print(f"Error processing data at index {i}: {e}")
            pass

    return pd.DataFrame(results)



提取数据

In [27]:
games_flat_data = extract_game_info(se)
games_flat_data

Unnamed: 0,name,steam_appid,required_age,is_free,dlc,website,pc_requirements,developers,publishers,categories,genres,ratings,price_overview,metacritic,recommendations,achievements,release_date
0,Counter-Strike2,730,0,True,1,http://counter-strike.net/,{'minimum': '<strong>Minimum:</strong><br><ulc...,[Valve],[Valve],"[Multi-player, Cross-PlatformMultiplayer, Stea...","[Action, FreetoPlay]","{'usk': {'rating': '16', 'descriptors': 'Gewal...",{},No score,4112956,True,"Aug21,2012"
1,RocketLeague®,252950,0,False,0,http://www.rocketleague.com/,{'minimum': '<strong>Minimum:</strong><br><ulc...,[PsyonixLLC],[PsyonixLLC],"[Single-player, Multi-player, PvP, OnlinePvP, ...","[Action, Indie, Racing, Sports]","{'esrb': {'rating': 'e', 'descriptors': 'MildL...",{},86,431062,True,"Jul6,2015"
2,DOOMEternal,782330,17,False,13,https://slayersclub.bethesda.net/en,{'minimum': '<strong>Minimum:</strong><br><ulc...,[idSoftware],[BethesdaSoftworks],"[Single-player, Multi-player, PvP, OnlinePvP, ...",[Action],"{'esrb': {'rating': 'm', 'descriptors': 'Blood...","{'currency': 'CAD', 'initial': 5349, 'final': ...",88,164485,True,"19Mar,2020"
3,Rust,252490,17,False,4,http://rust.facepunch.com/,{'minimum': '<strong>Minimum:</strong><br><ulc...,[FacepunchStudios],[FacepunchStudios],"[Multi-player, MMO, PvP, OnlinePvP, Co-op, Onl...","[Action, Adventure, Indie, MassivelyMultiplaye...","{'esrb': {'use_age_gate': 'true', 'required_ag...","{'currency': 'USD', 'initial': 3999, 'final': ...",69,872683,True,"Feb8,2018"
4,Counter-Strike,10,0,False,0,,{'minimum': '  <p><strong>Minimum:</strong>...,[Valve],[Valve],"[Multi-player, PvP, OnlinePvP, Shared/SplitScr...",[Action],{'usk': {'rating': '16'}},"{'currency': 'USD', 'initial': 999, 'final': 9...",88,149469,False,"Nov1,2000"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2893,SixDaysinFallujah,1548850,0,False,0,https://sixdays.com,{'minimum': '<strong>Minimum:</strong><br><ulc...,[HighwireGames],[Victura],"[Single-player, Multi-player, Co-op, OnlineCo-...","[Action, EarlyAccess]","{'dejus': {'rating': '14', 'descriptors': 'Vio...","{'currency': 'SGD', 'initial': 3400, 'final': ...",No score,8595,True,"22Jun,2023"
2894,UnrealTournament3,13210,0,False,0,,"{'minimum': '<ulclass=""bb_ul""><li>OS-Windows7o...","[EpicGames,Inc.]","[EpicGames,Inc.]","[Single-player, Multi-player, PvP, OnlinePvP, ...",[Action],"{'esrb': {'use_age_gate': '1', 'required_age':...",{},83,2542,True,"5Mar,2009"
2895,FromGloryToGoo,2607060,0,False,0,,{'minimum': '<strong>Minimum:</strong><br><ulc...,[StratagemBlue],[StratagemBlue],"[Single-player, SteamAchievements, SteamCloud,...","[Strategy, EarlyAccess]","{'dejus': {'rating_generated': '1', 'rating': ...","{'currency': 'SGD', 'initial': 1200, 'final': ...",No score,244,True,"2Apr,2024"
2896,KingdomsReborn,1307890,0,False,0,https://kingdomsreborn.com/,{'minimum': '<strong>Minimum:</strong><br><ulc...,[Earthshine],[Earthshine],"[Single-player, Multi-player, PvP, OnlinePvP, ...","[Indie, Simulation, Strategy, EarlyAccess]",{'dejus': {'rating': 'l'}},"{'currency': 'SGD', 'initial': 1850, 'final': ...",No score,7542,True,Nov2020


保存数据

In [28]:
games_flat_data.to_csv('../../data/processed/all_game_data.csv')