### Steam API requests to collect users metadata.

In [None]:
# import libraries
import numpy as np
import pandas as pd
import os
import requests as re
import time
from tqdm import tqdm
import random
from src.utils.utils import get_userids_from_steamid_uk



In [None]:
# import path, api key and steam id
USER_PLAYTIME_PATH = '../data/steam_playtime.csv'
steam_api_key = os.environ['STEAM_API']
steam_id = os.environ['STEAM_ID']

In [None]:
# get playtime data. if it doesn't exist create it with columns appid, steam_id, playtime_2weeks, playtime_forever
if os.path.exists(USER_PLAYTIME_PATH):
    playtime_df = pd.read_csv(USER_PLAYTIME_PATH)
else:
    playtime_df = pd.DataFrame(columns = ['appid', 'steam_id', 'playtime_2weeks', 'playtime_forever'])


In [None]:
playtime_df.head()

In [None]:
# set up url
owned_url = 'http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/'
ownedparams = {'key': steam_api_key, 'steamid': steam_id, 'format': 'json'}

In [None]:
# function to get steam users playtime data 
def user_playtimes(steamid):
    # global playtime_df
    global playtime_df
    # check if we parsed this one, i.e. steam_id is not in our dataframe steam_id column
    if steamid not in playtime_df['steam_id'].values:
        urlparams = {'key': steam_api_key, 'steamid': steamid, 'format': 'json'}
        url_request = re.get(owned_url, params=urlparams)
        time.sleep(1)
        # if status_code is not 200, return
        if url_request.status_code != 200:
            print (url_request.status_code, end=" ")
            return
        else:
            req_json = url_request.json()
            if 'games' in req_json['response']:
                print ('p', end='') # p for parsing
                # parse req_json fields for games user has played
                for game in req_json['response']['games']:
                    if game['playtime_forever'] > 0:
                        appid = game['appid']
                        playtime_forever = game['playtime_forever']
                        # if key playtime_2weeks exists
                        if 'playtime_2weeks' in game:
                            playtime_2weeks = game['playtime_2weeks']
                        else:
                            playtime_2weeks = 0
                        # append to dataframe
                        playtime_df = playtime_df.append({'appid': appid, 
                                                          'steam_id': steamid, 
                                                          'playtime_2weeks': playtime_2weeks, 
                                                          'playtime_forever': playtime_forever},
                                                         ignore_index=True)
            else:
                return

In [None]:
# url for getting friend list steam id's
getfriendlist_url = 'http://api.steampowered.com/ISteamUser/GetFriendList/v0001/'
getfriendparams = {'key': steam_api_key, 'steamid': steam_id, 'relationship': 'all', 'format': 'json'}

In [None]:
# function to get friends steam id's recursively, recursive depth set to 6
def friends_games(steamid, depth = 0):
    if depth == 6:
        return
    getfriendparams = {'key': steam_api_key, 'steamid': steamid, 'relationship': 'all', 'format': 'json'}
    url_request = re.get(getfriendlist_url, params=getfriendparams)
    time.sleep(1)
    # if status_code is not 200, return
    if url_request.status_code != 200:
        print(url_request.status_code, end=" ")
        return
    else:
        req_json = url_request.json()
        if 'friendslist' in req_json:
            for friend in req_json['friendslist']['friends']:
                steamid = friend['steamid']
                # get playtime data for this friend
                user_playtimes(steamid)
                # run friends_games on this friend
                friends_games(steamid, depth+1)
                time.sleep(2)
        else:
            return

In [None]:
# test run, parse through my friends
friends_games(steam_id)

In [None]:
history = get_userids_from_steamid_uk()

In [None]:
# loop through data and get user playtimes, takes less than a day :)
for someid in tqdm(history['someid']):
    user_playtimes(someid)

In [None]:
len(playtime_df)

In [None]:
# create a list of unique steam_id's from playtime
steamids = list(set(playtime_df['steam_id']))

In [None]:
# loop that picks random steam_id's from steamid_df and runs friends_games on it
for i in range(10):
    random_steamid = random.choice(steamids)
    friends_games(random_steamid)
    time.sleep(5)

In [None]:
# save data to csv
playtime_df.to_csv(USER_PLAYTIME_PATH, index=False)