### 导入必要包

In [59]:
import pandas as pd
import json 

### 读取数据

In [224]:
# 基础文件名和文件数
data_location = '../../data/raw/'
base_name = 'all_steam_data'
file_count = 7  # 假设你有7个文件,这里请视情况调整
file = open('data.json','r')
test_data = json.load(file)

# 创建一个空的DataFrame来存储所有数据
all_data = pd.DataFrame()

# 循环加载每个文件
for i in range(1, file_count + 1):
    file_name = f"{data_location}{base_name}{i}.json"  # 构造文件名
    with open(file_name, 'r') as file:
        data = json.load(file)
        # 假设每个文件的数据都是字典格式，需要转换为DataFrame
        df = pd.DataFrame(data)  # 这里可能需要根据实际数据结构调整
        all_data = pd.concat([all_data, df], ignore_index=True)


In [248]:
df = pd.concat([all_data[col] for col in all_data.columns], ignore_index= True)
df = pd.DataFrame(df)

### 提取数据函数定义

In [282]:

'''抓取玩家基础信息'''
def extract_player_info(player_data):
    # 查阅json文件，以下是所需查找的键
    info_keys = ['steamid', 'communityvisibilitystate', 'profilestate', 'personaname', 'profileurl', 'avatar', 'avatarmedium', 'avatarfull', 'avatarhash', 'personastate', 'primaryclanid', 'timecreated', 'personastateflags', 'loccountrycode']

    # 从嵌套的JSON中提取特定玩家的信息并转换为pandas Series
    try:
        steam_id = list(player_data.keys())[0]
        player_info = player_data[steam_id]['player_info']['response']['players'][0]
        return pd.Series({key: player_info.get(key, None) for key in info_keys})
    except Exception as e:
        # print(f"Error extracting player info: {e}")
        return pd.Series({key: None for key in info_keys})

'''抓取玩家游戏信息'''
def extract_player_games(player_data):

    # 初始化空DataFrame，确保在出错时这个变量已定义
    player_games_sorted_df = pd.DataFrame() 

    # 从嵌套的JSON中提取游戏时长最长的5款游戏信息并转换为一行pandas Series
    try:
        steam_id = list(player_data.keys())[0]
        player_games = player_data[steam_id]['player_games']['response']
        player_games_df = pd.DataFrame(player_games['games'])  # 将玩家游戏列表整理成DataFrame
        player_games_sorted_df = player_games_df.sort_values(by='playtime_forever', ascending=False, ignore_index=True).head(5)[['appid', 'name', 'playtime_forever']]  # 选择玩家玩的最多的前5款游戏
        
        new_se = pd.Series()
        for i in range(1, len(player_games_sorted_df) + 1):
            temp_df = player_games_sorted_df.iloc[i-1].copy()
            temp_df.index = [f'{idx}_game{i}' for idx in temp_df.index]
            new_se = pd.concat([new_se, temp_df])
        return new_se
    except Exception as e:
        # print(f"Error extracting player games: {e}")
        all_keys = [f'{idx}_game{i}' for i in range(1, len(player_games_sorted_df) + 1) for idx in player_games_sorted_df.columns]
        return pd.Series({key: None for key in all_keys})

'''抓取玩家近期游戏信息'''
def extract_recently_played_games(player_data):
    recently_played_games = []
    try:
        player_id = list(player_data.keys())[0]
        recently_played_games = pd.DataFrame(player_data[player_id]['recently_played_games']['response']['games'])[['appid','name','playtime_2weeks','playtime_forever']] 
        null_se = pd.Series()
        for i in range(1,len(recently_played_games)+1):
            temp_se = pd.Series(recently_played_games.iloc[i-1]).copy()
            temp_se.index = [f'{idx}_game{i}' for idx in temp_se.index]
            null_se = pd.concat([null_se,temp_se])
        return null_se
    except:
        # print(f"Error extracting recently played games: {e}")
        all_keys = [f'{idx}_game{i}' for i in range(1, len(recently_played_games) + 1) for idx in recently_played_games.columns]
        return pd.Series({key: None for key in all_keys})


### 查询并整合

In [284]:
player_info_df = df[0].apply(extract_player_info)
player_games_df = df[0].apply(extract_player_games)
player_recently_played_games_df = df[0].apply(extract_recently_played_games)

In [287]:
new_df = pd.concat([player_info_df,player_games_df,player_recently_played_games_df],ignore_index=False, axis=1)
new_df

Unnamed: 0,steamid,communityvisibilitystate,profilestate,personaname,profileurl,avatar,avatarmedium,avatarfull,avatarhash,personastate,...,playtime_2weeks_game1,playtime_forever_game1,appid_game2,name_game2,playtime_2weeks_game2,playtime_forever_game2,appid_game3,name_game3,playtime_2weeks_game3,playtime_forever_game3
0,76561197960269909,3.0,1.0,ツxxツ,https://steamcommunity.com/id/xcari/,https://avatars.steamstatic.com/c8499ee4d5ebde...,https://avatars.steamstatic.com/c8499ee4d5ebde...,https://avatars.steamstatic.com/c8499ee4d5ebde...,c8499ee4d5ebdebd78f07fc3fa19ce5370da82be,0.0,...,,,,,,,,,,
1,76561197960280440,3.0,1.0,recon,https://steamcommunity.com/id/pzrecon/,https://avatars.steamstatic.com/628974cb0fcec1...,https://avatars.steamstatic.com/628974cb0fcec1...,https://avatars.steamstatic.com/628974cb0fcec1...,628974cb0fcec15a07cd1601fdadc7aa44ac245d,0.0,...,,,,,,,,,,
2,76561197960290472,3.0,1.0,JKBe,https://steamcommunity.com/profiles/7656119796...,https://avatars.steamstatic.com/c698ae39dd85c1...,https://avatars.steamstatic.com/c698ae39dd85c1...,https://avatars.steamstatic.com/c698ae39dd85c1...,c698ae39dd85c1a1567e184a4b1735e9077a475f,0.0,...,,,,,,,,,,
3,76561197960315957,3.0,1.0,chuNami5000,https://steamcommunity.com/id/chuNk--/,https://avatars.steamstatic.com/b16314c3aff86b...,https://avatars.steamstatic.com/b16314c3aff86b...,https://avatars.steamstatic.com/b16314c3aff86b...,b16314c3aff86bf70b0bb5e54570fe8aa3efdd5d,3.0,...,1189.0,371092.0,1877600.0,MidnightGuns,82.0,82.0,952060.0,ResidentEvil3,67.0,272.0
4,76561197960331205,3.0,1.0,Mikki,https://steamcommunity.com/profiles/7656119796...,https://avatars.steamstatic.com/fef49e7fa7e199...,https://avatars.steamstatic.com/fef49e7fa7e199...,https://avatars.steamstatic.com/fef49e7fa7e199...,fef49e7fa7e1997310d705b2a6158ff8dc1cdfeb,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20735,76561198130132230,3.0,1.0,Fishvit,https://steamcommunity.com/id/hackeraim007/,https://avatars.steamstatic.com/18cf9e93edbe79...,https://avatars.steamstatic.com/18cf9e93edbe79...,https://avatars.steamstatic.com/18cf9e93edbe79...,18cf9e93edbe79213f24fd277a0c8fc05e9ec68f,1.0,...,3644.0,26382.0,1172470.0,ApexLegends,215.0,56888.0,,,,
20736,76561198981187936,3.0,1.0,juhobossu,https://steamcommunity.com/id/zeihou16/,https://avatars.steamstatic.com/3fe6469f4eb139...,https://avatars.steamstatic.com/3fe6469f4eb139...,https://avatars.steamstatic.com/3fe6469f4eb139...,3fe6469f4eb13922612c7bf2a66d7df43ac47c5e,0.0,...,,,,,,,,,,
20737,76561198286511601,3.0,1.0,OhNo|Zigludo,https://steamcommunity.com/profiles/7656119828...,https://avatars.steamstatic.com/d472abf1cd758e...,https://avatars.steamstatic.com/d472abf1cd758e...,https://avatars.steamstatic.com/d472abf1cd758e...,d472abf1cd758e4e47c0b03ed915300ad5ca5fa4,0.0,...,48.0,23865.0,,,,,,,,
20738,76561198106511894,3.0,1.0,MUNK,https://steamcommunity.com/profiles/7656119810...,https://avatars.steamstatic.com/3604ac34b47c87...,https://avatars.steamstatic.com/3604ac34b47c87...,https://avatars.steamstatic.com/3604ac34b47c87...,3604ac34b47c87e187d151f22aa17e107253ce34,1.0,...,1037.0,6228.0,553850.0,HELLDIVERS™2,51.0,7962.0,1938090.0,CallofDuty®,5.0,6860.0


### 保存数据用于后续分析

In [288]:
new_df.to_csv('../../data/processed/all_steam_data.csv')