### 第一步
首先请运行[basic_analyze_process.sql](basic_analyze_process.sql)，进行基础查询,包括：<br>
1.  统计不同社区可见性状态的用户数量
2.  统计国籍分布情况
3.  计算不同国家玩家的平均游戏数量

### 第二步
使用pandas对[STEAM数据文件](../../data/processed/all_steam_and_game_data_after_cleaned.csv)进行更为细致的查询

In [31]:
import pandas as pd
import re
data = pd.read_csv('../../data/processed/all_steam_and_game_data_after_cleaned.csv')

#### 统计一直以来的热门游戏

In [32]:
# 使用正则表达式提取playtime列以及对应的游戏时长和价格
selected_columns = []

for idx, col in enumerate(data.columns):
    if re.search(r'playtime', col):
        if idx <= 66:
            # 提取总时长前五的所有游戏信息
            selected_columns.extend([data.columns[idx-1], data.columns[idx], data.columns[idx+1],data.columns[idx+2],data.columns[idx+3],data.columns[idx+4], data.columns[idx+5], data.columns[idx+6]]) 
        else:
            pass


# 创建包含这些列的新DataFrame
new_df = data[selected_columns]


In [33]:
games_df = pd.DataFrame()
# 在进行concate时，要保证列名一致，因此此处清空列名
new_df.rename(columns={col:''  for col in new_df.columns}, inplace=True) 

for i in range(0, len(new_df.columns), 8):   

    subset = new_df.iloc[:, i:i+8] # 选择当前组的8列
    
    # 如果games_df为空，直接赋值，否则在行方向上进行合并
    if games_df.empty:
        games_df = subset
    else:
        games_df = pd.concat([games_df, subset], axis=0,ignore_index=True ) 

games_df.columns=['name', 'playtime_forever', 'price', 'genres','developers', 'publishers', 'categories', 'release_date',]    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df.rename(columns={col:''  for col in new_df.columns}, inplace=True)


In [34]:
# 删除未给出名称的游戏
games_df = games_df[games_df['name']!=('Unknown')]

关键数据整合

In [35]:
grouped = games_df.groupby('name').agg({'playtime_forever':['count','mean','median','max','min','std']})
# 将多级列名展平成单级列名
display(grouped.columns)
grouped['playtime_forever', 'cv'] = grouped[('playtime_forever', 'std')] / grouped[('playtime_forever', 'mean')]
grouped.columns = ['_'.join(col) for col in grouped.columns]
# 重置索引，使其变为平面格式
grouped.reset_index(inplace=True)
grouped

MultiIndex([('playtime_forever',  'count'),
            ('playtime_forever',   'mean'),
            ('playtime_forever', 'median'),
            ('playtime_forever',    'max'),
            ('playtime_forever',    'min'),
            ('playtime_forever',    'std')],
           )

Unnamed: 0,name,playtime_forever_count,playtime_forever_mean,playtime_forever_median,playtime_forever_max,playtime_forever_min,playtime_forever_std,playtime_forever_cv
0,100%OrangeJuice,6,31540.666667,8249.5,126727,0,49695.752638,1.575609
1,11-11MemoriesRetold,2,0.000000,0.0,0,0,0.000000,
2,12LaboursofHercules,1,0.000000,0.0,0,0,,
3,1v1.LOL,2,342.500000,342.5,685,0,484.368145,1.414214
4,3DCoatModdingTool,1,14872.000000,14872.0,14872,14872,,
...,...,...,...,...,...,...,...,...
2262,太吾绘卷TheScrollOfTaiwu,1,11926.000000,11926.0,11926,11926,,
2263,我来自江湖,1,1000.000000,1000.0,1000,1000,,
2264,暖雪WarmSnow,1,5762.000000,5762.0,5762,5762,,
2265,雀魂麻将(MahjongSoul),5,3341.800000,3052.0,6161,906,1901.407821,0.568977


In [36]:
# 将游戏数据与玩家相关的统计数据融合并保存
grouped = pd.merge(left= grouped, right= games_df,how='left', left_on='name', right_on='name',suffixes=['','_suffixes'],).drop_duplicates(subset='name')
grouped.to_csv('./query_results/most_frequent_play_game_all_timelist.csv')

进行sort,filter等操作

In [37]:
# 按照count降序，变异系数升序排列，筛选最热门的游戏
display(grouped.sort_values(by=['playtime_forever_count','playtime_forever_cv'], ascending=[False,True]).head(10))

Unnamed: 0,name,playtime_forever_count,playtime_forever_mean,playtime_forever_median,playtime_forever_max,playtime_forever_min,playtime_forever_std,playtime_forever_cv,playtime_forever,price,genres,developers,publishers,categories,release_date
4296,Counter-Strike2,3246,128244.762785,91152.0,1309301,0,126947.482097,0.989884,371092,0,"['Action', 'FreetoPlay']",['Valve'],['Valve'],"['Multi-player', 'Cross-PlatformMultiplayer', ...","Aug21,2012"
16020,PUBG:BATTLEGROUNDS,1274,36613.631868,22659.0,774706,0,49841.577018,1.361285,157729,0,"['Action', 'Adventure', 'MassivelyMultiplayer'...","['KRAFTON,Inc.']","['KRAFTON,Inc.']","['Multi-player', 'PvP', 'OnlinePvP', 'Stats', ...","Dec21,2017"
12337,GrandTheftAutoV,702,25185.519943,16382.5,804789,0,42272.565165,1.678447,194418,0,"['Action', 'Adventure']",['RockstarNorth'],['RockstarGames'],"['Single-player', 'Multi-player', 'PvP', 'Onli...","Apr13,2015"
19328,Rust,564,79996.143617,33796.5,1053610,0,120276.000444,1.503522,104399,39,"['Action', 'Adventure', 'Indie', 'MassivelyMul...",['FacepunchStudios'],['FacepunchStudios'],"['Multi-player', 'MMO', 'PvP', 'OnlinePvP', 'C...","Feb8,2018"
7615,Counter-Strike:Source,550,51445.161818,28837.0,580534,0,67530.55705,1.312671,107691,9,['Action'],['Valve'],['Valve'],"['Multi-player', 'Cross-PlatformMultiplayer', ...","Nov1,2004"
3759,Counter-Strike,537,37940.722533,10232.0,336526,0,57830.649202,1.524237,104494,9,['Action'],['Valve'],['Valve'],"['Multi-player', 'PvP', 'OnlinePvP', 'Shared/S...","Nov1,2000"
11577,Garry'sMod,536,74914.964552,30030.0,1200030,0,132901.862214,1.774036,38897,4,"['Casual', 'Indie', 'Simulation']",['FacepunchStudios'],['Valve'],"['Single-player', 'Multi-player', 'PvP', 'Onli...","29Nov,2006"
22637,TomClancy'sRainbowSixSiege,456,35053.427632,24738.5,238566,0,36078.889233,1.029254,94641,19,['Action'],['UbisoftMontreal'],['Ubisoft'],"['Single-player', 'Multi-player', 'PvP', 'Onli...","Dec1,2015"
18796,RocketLeague,452,65517.969027,42574.5,575738,0,75319.208662,1.149596,37642,0,"['Action', 'Indie', 'Racing', 'Sports']",['PsyonixLLC'],['PsyonixLLC'],"['Single-player', 'Multi-player', 'PvP', 'Onli...","Jul6,2015"
783,ApexLegends,380,26657.486842,15927.0,255822,0,33099.738236,1.241668,9239,0,"['Action', 'Adventure', 'FreetoPlay']",['Respawn'],['ElectronicArts'],"['Multi-player', 'PvP', 'OnlinePvP', 'Co-op', ...","Nov4,2020"


In [38]:
# 筛选范围，查找游戏
display(grouped[(grouped['playtime_forever_count']>=100) & (grouped['playtime_forever_count']<300) & (grouped['playtime_forever_max']>0) & (grouped['playtime_forever_min']>0)].sort_values(by=['playtime_forever_count','playtime_forever_cv'], ascending=[False,True]).head(10))

Unnamed: 0,name,playtime_forever_count,playtime_forever_mean,playtime_forever_median,playtime_forever_max,playtime_forever_min,playtime_forever_std,playtime_forever_cv,playtime_forever,price,genres,developers,publishers,categories,release_date
2891,CallofDuty:ModernWarfare2(2009)-Multiplayer,184,28095.798913,19860.5,302328,258,34505.068901,1.228122,264890,0,Unknown,Unknown,Unknown,Unknown,Unknown
22063,TheElderScrollsV:Skyrim,160,20927.15625,15170.5,78381,373,17451.717404,0.833927,56368,19,['RPG'],['BethesdaGameStudios'],['BethesdaSoftworks'],"['Single-player', 'SteamAchievements', 'SteamT...","Nov10,2011"
21834,TheBindingofIsaac:Rebirth,134,31478.932836,27428.5,179713,208,23281.684351,0.739596,44525,5,['Action'],"['Nicalis,Inc.', 'EdmundMcMillen']","['Nicalis,Inc.']","['Single-player', 'Multi-player', 'Shared/Spli...","4Nov,2014"
20320,SidMeier'sCivilizationV,126,30168.555556,18995.5,249161,1832,36970.80353,1.225475,38592,7,['Strategy'],"['FiraxisGames', 'Aspyr(Mac)', 'Aspyr(Linux)']","['2K', 'Aspyr(Mac)', 'Aspyr(Linux)']","['Single-player', 'Multi-player', 'SteamAchiev...","Sep21,2010"
