### Steam API requests to collect users metadata.

In [28]:
# import libraries
import pandas as pd
import os
import requests as re
import time
from tqdm import tqdm
import random

In [29]:
# import path, api key and steam id
USER_PLAYTIME_PATH = '../data/steam_playtime.csv'
STEAM_API_KEY = os.environ['STEAM_API']
MY_STEAM_ID = os.environ['STEAM_ID']

In [33]:
# urls for api calls
owned_url = 'http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/'
getfriendlist_url = 'http://api.steampowered.com/ISteamUser/GetFriendList/v0001/'

In [30]:
# get playtime data. if it doesn't exist create it with columns appid, MY_STEAM_ID, playtime_2weeks, playtime_forever
if os.path.exists(USER_PLAYTIME_PATH):
    playtime_df = pd.read_csv(USER_PLAYTIME_PATH)
else:
    playtime_df = pd.DataFrame(columns = ['appid', 'steam_id', 'playtime_2weeks', 'playtime_forever'])


In [31]:
len(playtime_df)

2226459

In [None]:
def parse_playtime_json(steamid, json_data) -> list[dict]:
    """ Get game playtimes from the json data and return a list of dicts """
    
    user_playtimes = [] 
    for game in json_data['response']['games']:
        if game['playtime_forever'] > 0:
            appid = game['appid']
            playtime_forever = game['playtime_forever']
            if 'playtime_2weeks' in game:
                playtime_2weeks = game['playtime_2weeks']
            else:
                playtime_2weeks = 0
            user_playtimes.append({'appid': appid, 
                                    'steam_id': steamid, 
                                    'playtime_2weeks': playtime_2weeks, 
                                    'playtime_forever': playtime_forever})
    return user_playtimes

In [34]:
def get_user_playtimes(steamid):
    """ function to get steam users playtime data """
    
    global playtime_df
    
    # check if we parsed this one, i.e. steam_id is not in our dataframe steam_id column
    if steamid not in playtime_df['steam_id'].values:
        urlparams = {'key': STEAM_API_KEY, 'steamid': steamid, 'format': 'json'}
        url_request = re.get(owned_url, params=urlparams)
        time.sleep(1)
        # if status_code is not 200, return
        if url_request.status_code != 200:
            print(url_request.status_code, end='')
            return
        else:
            req_json = url_request.json()
            if 'games' in req_json['response']:
                print('p', end='') # p for parsing
                # parse req_json fields for games user has played

                for game in req_json['response']['games']:
                    if game['playtime_forever'] > 0:
                        appid = game['appid']
                        playtime_forever = game['playtime_forever']
                        # if key playtime_2weeks exists
                        if 'playtime_2weeks' in game:
                            playtime_2weeks = game['playtime_2weeks']
                        else:
                            playtime_2weeks = 0
                        # append to dataframe
                        playtime_df = playtime_df.append({'appid': appid, 
                                                          'steam_id': steamid, 
                                                          'playtime_2weeks': playtime_2weeks, 
                                                          'playtime_forever': playtime_forever},
                                                         ignore_index=True)
            else:
                return

In [9]:
def friends_games(steamid, depth = 0):
    """ function to get friends steam id's recursively, recursive depth set to 6 """
    if depth == 6:
        return
    getfriendparams = {
        'key': STEAM_API_KEY,
        'steamid': steamid, 
        'relationship': 'all', 
        'format': 'json'}
    url_request = re.get(getfriendlist_url, params=getfriendparams)
    time.sleep(1)
    if url_request.status_code != 200:
        print(url_request.status_code, end=" ")
        return
    else:
        req_json = url_request.json()
        if 'friendslist' in req_json:
            for friend in req_json['friendslist']['friends']:
                steamid = friend['steamid']
                # get playtime data for this friend
                get_user_playtimes(steamid)
                # run friends_games on this friend
                friends_games(steamid, depth+1)
                time.sleep(2)
        else:
            return

In [None]:
# test run, parse through my friends
friends_games(MY_STEAM_ID)

### GET PLAYTIME DATA FROM STEAMID.UK ID's

In [21]:
def get_userids_from_steamid_uk() -> pd.DataFrame:
    """ 
    parse the csv files from https://steamid.uk/downloads/
    and return a dataframe with unique userids. 
    Download files from the site above and place them in your data folder.
    :return: dataframe with unique userids from steamid.uk
    """

    sources = ['../data/3digitURLhistory04_06_2019.csv',
    '../data/3digitURLhistory16_04_2019.csv',
    '../data/3digitURLhistory18_01_2020.csv',
    '../data/3digitURLhistory18_03_2019.csv']
    
    source_col0 = '../data/3digitURLhistory26_02_2019.csv'

    # create DataFrame with feature 'someid'
    history = pd.DataFrame(columns=['someid'])

    # read in the csv files
    for source in sources:
        history = history.append(pd.read_csv(source, header=None, usecols=[1], names=['someid']))

    history = history.append(pd.read_csv(source_col0, header=None, usecols=[0], names=['someid']))

    # return unique ids
    return history.drop_duplicates()

In [22]:
# create DataFrame with feature 'someid'
history = get_userids_from_steamid_uk()

In [26]:
history.to_csv('../data/steamid_uk_userids.csv', index=False)

In [None]:
# loop through data and get user playtimes, takes less than a day :)
for someid in tqdm(history['someid']):
    user_playtimes(someid)

In [12]:
len(playtime_df)

2226459

### GET PLAYTIME DATA RECURSIVELY

In [10]:
# create a list of unique steam_id's from playtime
steamids = list(set(playtime_df['steam_id']))

In [11]:
# loop that picks random steam_id's from steamids and runs friends_games on it
for i in range(10):
    random_steamid = random.choice(steamids)
    friends_games(random_steamid)
    time.sleep(5)

401 

  if steamid not in playtime_df['steam_id'].values:


pp401 401 401 401 pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp401 401 p401 pppppppppppppppp401 pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp401 ppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp

ConnectionError: HTTPConnectionPool(host='api.steampowered.com', port=80): Max retries exceeded with url: /IPlayerService/GetOwnedGames/v0001/?key=8CC43A4BB140A28ED82F4A00A7A006EF&steamid=76561198351070848&format=json (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f5e6418ae80>: Failed to establish a new connection: [Errno -2] Name or service not known'))

In [13]:
# save data to csv
playtime_df.to_csv(USER_PLAYTIME_PATH, index=False)