In [6]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests

In [2]:
user_df = pd.read_csv('data/steam_new_users.csv')
user_ids = user_df.loc[:, ["user_id", "public"]].to_dict(orient='list')

In [3]:
def if_have_games(user_id):
    req = f'https://steamcommunity.com/profiles/{user_id}/games?tab=all&xml=1'
    # print(req)
    num_tries = 3
    while num_tries >= 0:
        resp = requests.get(req)
        if resp.status_code != 200:
            print(f"Status code: {resp.status_code}")
            print(f"Left retries: {num_tries}")
            if resp.status_code == 429:
                print("MAX RETRIES")
                raise KeyError(f"{user_id}")
            num_tries -= 1 
            continue
        from xml.etree import ElementTree
        from xml.etree.ElementTree import ParseError
        try:
            xml_tree = ElementTree.fromstring(resp.content)
            break
        except ParseError:
            num_tries -= 1 
    
    if num_tries < 0:
        return False
    
    for elem in xml_tree.iter('*'):
        if elem.tag == "game":
            return True
    return False

In [30]:
for u in user_ids['user_id'][3100:5688]:
    public = if_have_games(u)
    print(f"{u}: {public}")
    user_df.loc[user_df['user_id'] == u, 'public'] = public

Status code: 429
Left retries: 3
MAX RETRIES


KeyError: '76561198204183475'

In [29]:
user_df.to_csv('data/steam_new_users.csv', index=False)

In [24]:
user_df[user_df['public'] == True].head(100)

Unnamed: 0,user_id,public,used
5,76561198029605279,True,True
8,76561198052143086,True,False
9,76561198052692877,True,False
19,76561198079942325,True,False
29,76561198441341534,True,False
...,...,...,...
496,76561198121159962,True,False
502,76561198165392406,True,False
505,76561198204559824,True,False
508,76561198267660563,True,False


In [31]:
from IPython.display import display
pd.options.display.max_columns = None

In [26]:
print(user_df[user_df['public'] == True].to_string())

                 user_id  public   used
5      76561198029605279    True   True
8      76561198052143086    True  False
9      76561198052692877    True  False
19     76561198079942325    True  False
29     76561198441341534    True  False
49     76561198003369491    True  False
67     76561198029074467    True  False
73     76561198034273011    True  False
82     76561198039516804    True  False
92     76561198050103023    True  False
100    76561198058050734    True  False
103    76561198068390989    True  False
118    76561198075972829    True  False
120    76561198076779541    True  False
127    76561198078186377    True  False
149    76561198083548453    True  False
161    76561198108308571    True  False
165    76561198111114137    True  False
168    76561198117450646    True  False
174    76561198128050757    True  False
176    76561198135528609    True  False
177    76561198137104694    True  False
184    76561198155984367    True  False
203    76561198197563899    True  False


In [28]:
user_df[user_df['public'] == True].shape

(1608, 3)