In [2]:
import requests, json, os, sys, time, re, math,csv
from datetime import datetime
import pandas as pd

In [3]:
def show_work_status(singleCount, totalCount, currentCount=0):
    currentCount += singleCount
    percentage = 100.0 * currentCount / totalCount
    status =  '>' * int(percentage)  + ' ' * (100 - int(percentage))
    sys.stdout.write('\r[{0}] {1:.2f}% '.format(status, percentage))
    sys.stdout.flush()
    if percentage >= 100:
        print('\n')


def split_list(lst_long,n):
    lst_splitted = []
    if len(lst_long) % n == 0:
        totalBatches = len(lst_long) / n
    else:
        totalBatches = len(lst_long) / n + 1
    for i in range(int(totalBatches)):
        lst_short = lst_long[i*n:(i+1)*n]
        lst_splitted.append(lst_short)
    return lst_splitted

## Get  game data and detail

In [4]:
url = 'https://api.steampowered.com/ISteamApps/GetAppList/v2/'
r = requests.get(url)
dic_app_list = r.json()
lst_app_id = [i.get('appid') for i in dic_app_list.get('applist').get('apps')]
print('Total apps:', len(lst_app_id)) 

Total apps: 94599


## Using a portion of game data to demonstrate how to do web scrapping (due to the large size of data)

In [5]:
#using 50 data to demonstrate the results as an example
## write a new text file
def get_steam_app_info():
    url = 'https://api.steampowered.com/ISteamApps/GetAppList/v2/'
    r = requests.get(url)
    dic_app_list = r.json()
    lst_app_id = [i.get('appid') for i in dic_app_list.get('applist').get('apps')]
    print('Total apps:', len(lst_app_id[:50]))

    total_count = len(lst_app_id[:50])
    current_count = 0
    show_work_status(0, total_count, current_count)

    path_app_detail_sample = 'test_api.txt' 
    with open(path_app_detail_sample, 'w') as f:
        for app_id in lst_app_id[:50]:
            url_app_detail = ('http://store.steampowered.com/api/appdetails?appids=%s') % (app_id)
            for i in range(3):
                try:
                    r = requests.get(url_app_detail)
                    result = r.json()
                    break
                except:
                    time.sleep(5)
                    pass
            f.write(json.dumps(result))
            f.write('\n')
            show_work_status(1, total_count, current_count)
            current_count += 1
            if current_count % 200 == 0:
                time.sleep(300)
            else:
                time.sleep(.5)



get_steam_app_info()








Total apps: 50
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 100.00% 



In [5]:
path_app_info = 'test_api.txt'

## Parse data into dataframe 

In [6]:
def parse_steam_app_info(steam_app_info):
    if steam_app_info:
        for app_id, app_info in steam_app_info.items():
            success = app_info.get('success')
            if success:
                success = 1
                app_data = app_info.get('data')
                developers = ', '.join(app_data.get('developers', []))
                if not developers:
                    developers = None
                publishers = ', '.join(app_data.get('publishers', []))
                if not publishers:
                    publishers = None
                name = app_data.get('name')
                required_age = app_data.get('required_age')
                short_description = app_data.get('short_description')
                critic_score = app_data.get('metacritic', {}).get('score')
                app_type = app_data.get('type')
                recommendation = app_data.get('recommendations',{}).get('total')
                header_image = app_data.get('header_image')
                fullgame = app_data.get('fullgame',{}).get('appid')
                supported_languages = app_data.get('supported_languages')
                if supported_languages:
                    supported_languages = supported_languages.replace('<strong>*</strong>', '').replace('<br>languages with full audio support','')
                if app_data.get('is_free') == True:
                    initial_price = 0
                    currency = 'USD'
                else:
                    if app_data.get('price_overview',{}):
                        initial_price = app_data.get('price_overview',{}).get('initial', 0) / 100
                        currency = app_data.get('price_overview',{}).get('currency')
                    else:
                        initial_price = None
                        currency = None
                if app_data.get('platforms',{}).get('linux'):
                    linux = 1
                else:
                    linux = 0
                if app_data.get('platforms',{}).get('mac'):
                    mac = 1
                else:
                    mac = 0
                if app_data.get('platforms',{}).get('windows'):
                    windows = 1
                else:
                    windows = 0
                if app_data.get('release_date',{}).get('coming_soon') == False:
                    release_date = app_data.get('release_date',{}).get('date')
                    if release_date:
                        try:
                            release_date = datetime.strptime(release_date, '%b %d, %Y').date()
                        except Exception as e:
                            try:
                                release_date = datetime.strptime(release_date, '%d %b, %Y').date()
                            except:
                                try:
                                    release_date = datetime.strptime(release_date, '%b %Y').date()
                                except:
                                    release_date = None
                    else:
                        release_date = None
                else:
                    release_date = None
                dic_steam_app = {
                    app_id : {
                        'app_id' : app_id,
                        'currency' : currency,
                        'developers' : developers,
                        'publishers' : publishers,
                        'name' : name,
                        'required_age' : required_age,
                        'short_description' : short_description,
                        'critic_score' : critic_score,
                        'type' : app_type,
                        'recommendation' : recommendation,
                        'header_image' : header_image,
                        'initial_price' : initial_price,
                        'linux' : linux,
                        'mac' : mac,
                        'windows' : windows,
                        'fullgame' : fullgame,
                        'release_date' : release_date,
                        'supported_languages' : supported_languages,
                        'success' : success
                    }
                }
            else:
                dic_steam_app = {app_id : {'app_id' : app_id, 'success' : 0}}
    else:
        dic_steam_app = {}
    return dic_steam_app

In [7]:
def update_steam_game_info():
    print('Parse app info and dump to database')
    dic_steam_app = {}
    with open(path_app_info, 'rb') as f:
        lst_raw_string = f.readlines()
        total_count = len(lst_raw_string)
        current_count = 0
        for i in lst_raw_string:
            app_info = json.loads(i)
            dic_steam_app.update(parse_steam_app_info(app_info))
            show_work_status(1, total_count, current_count)
            current_count += 1
            
    df_steam_app = pd.DataFrame.from_dict(dic_steam_app, 'index')
    df_steam_app = df_steam_app.loc[:,['app_id','name', 'release_date', 'type', 'currency', 'initial_price', 'developers', 'publishers', 'required_age', 'linux', 'mac', 'windows', 'fullgame', 'supported_languages', 'header_image', 'short_description']]
    df_steam_app.to_csv('app_test.csv')
    data = pd.read_csv('app_test.csv')
    return data.head()
update_steam_game_info() 

Parse app info and dump to database
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 100.00% 



Unnamed: 0.1,Unnamed: 0,app_id,name,release_date,type,currency,initial_price,developers,publishers,required_age,linux,mac,windows,fullgame,supported_languages,header_image,short_description
0,216938,216938,,,,,,,,,,,,,,,
1,660010,660010,,,,,,,,,,,,,,,
2,660130,660130,,,,,,,,,,,,,,,
3,1294730,1294730,KILL YOUR FRIENDS Demo,,demo,USD,0.0,ROBINJAM GAMES,ROBINJAM GAMES,0.0,1.0,1.0,1.0,675110.0,,https://steamcdn-a.akamaihd.net/steam/apps/129...,
4,1294850,1294850,Into A Dream Demo,,demo,USD,0.0,Filipe F. Thomaz,Filipe F. Thomaz,0.0,0.0,0.0,1.0,1238360.0,"English, Portuguese - Brazil, Russian",https://steamcdn-a.akamaihd.net/steam/apps/129...,You are his last hope. The only one who can re...


## Using a portion of timing data as a demo code

In [6]:
path_user_id = 'steam_user_id.txt'
with open(path_user_id, 'rb') as f:
    lst_user_id = f.readlines()[:50]

In [7]:
# get your key from steam
def worker(lst_user_id_temp):
    dic_temp = {}
    for user_id in lst_user_id_temp:
        base_url = 'http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/'
        params = {
            'key' : '6A5C427F975DE2307958C931D1E24752',
            'steamid' : user_id.strip(),
            'format' : 'json' }
        r = requests.get(base_url, params = params)
        user_inventory = r.json().get('response').get('games')
        dic_temp.update({user_id.strip():user_inventory})
        time.sleep(.5)
    return dic_temp
    

In [8]:
# It is a demonstration how to use multiprocessing in python 
from multiprocessing import Pool
p = Pool(2)

total_count = len(lst_user_id)
current_count = 0
show_work_status(0, total_count, current_count)

dic_master = {}
for i in split_list(lst_user_id,10):
    lst_temp_dic = p.map(worker, split_list(i,5))
    for j in lst_temp_dic:
        dic_master.update(j)
    show_work_status(len(i), total_count, current_count)
    current_count += len(i)
    time.sleep(5)

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 100.00% 



In [9]:
## write a new inventory data
with open('crawled_user_inventory.txt', 'w') as f:
    for user_id, user_inventory in list(dic_master.items()):
        f.write(json.dumps({str(user_id.decode('utf8')):user_inventory}))
        f.write('\n')

In [11]:
path_user_inventory = 'crawled_user_inventory.txt'

In [12]:
lst_player_game_playtime = []        
with open(path_user_inventory, 'rb') as f:
    for raw_string in f.readlines():
        for user_id, lst_inventory in list(json.loads(raw_string).items()):
            if lst_inventory:
                for i in lst_inventory:
                    lst_player_game_playtime.append((int(user_id),i['appid'],i['playtime_forever']))
                    
                    #show_work_status(1, total_count, current_count)
                    #current_count += 1

df_player_time = pd.DataFrame(lst_player_game_playtime,columns = ["steamid","appid","playtime_forever"])
df_player_time.to_csv("test_steam_playertime.csv")
print(df_player_time.shape)
df_player_time.head()

    

(3024, 3)


Unnamed: 0,steamid,appid,playtime_forever
0,76561198074188133,4000,3415
1,76561198074188133,34030,16526
2,76561198074188133,42680,4631
3,76561198074188133,42690,11055
4,76561198074188133,207610,126
