In [1]:
import requests
import json
import pandas as pd

import time

from bs4 import BeautifulSoup

In [2]:
key = "17755C868F1D9D9D8BD453A0C8D7CB03"
steam_id = "76561198123999673"

### Get Friend List

In [3]:
def get_friend_list(steam_id):
    page_friends = f"https://api.steampowered.com/ISteamUser\
    /GetFriendList/v0001/?key={key}&steamid={steam_id}&relationship=friend"
    response_friends = requests.get(page_friends)
    if len(json.loads(response_friends.text)) == 0:
        print('No response!')
        return None
    data_friends = json.loads(response_friends.text)['friendslist']['friends']
    df_friends = pd.json_normalize(data_friends).steamid
    return df_friends

def concat_friend_lists(df1, df2):
    df_friends = pd.concat([df1, df2], ignore_index=True)
    df_friends.drop_duplicates(inplace=True)
    return df_friends

In [4]:
df_friends = get_friend_list(steam_id)

In [5]:
download = False
if download:
    for i, item in df_friends[:5].iteritems():
        df_friends = concat_friend_lists(df_friends, get_friend_list(item))
        time.sleep(.5)
        print(f'{i, item} Done...')
    
    df_friends.to_csv('../data/raw/steamAPI_friendsList.csv')
else:
    df_friends = pd.read_csv('../data/raw/steamAPI_friendsList.csv'
                             , index_col=False).loc[:, '0']

In [6]:
df_friends.info()

<class 'pandas.core.series.Series'>
RangeIndex: 346158 entries, 0 to 346157
Series name: 0
Non-Null Count   Dtype
--------------   -----
346158 non-null  int64
dtypes: int64(1)
memory usage: 2.6 MB


In [7]:
df_friends

0         76561197966638457
1         76561198034918747
2         76561198067832559
3         76561198074247705
4         76561198077671905
                ...        
346153    76561198818184609
346154    76561198855934566
346155    76561198967413465
346156    76561199038016644
346157    76561199227050728
Name: 0, Length: 346158, dtype: int64

### Get Owned Games

In [8]:
def get_owned_games(steam_id):
    page_games = f"http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key={key}&steamid={steam_id}&format=json"
    response_games = requests.get(page_games)
    if ((len(json.loads(response_games.text)) == 0) | (len(json.loads(response_games.text)['response']) == 0)):
        print('No response!')
        return None
    data_games = json.loads(response_games.text)['response']['games']
    df_games = pd.json_normalize(data_games).assign(steamid=steam_id).loc[:, ['steamid', 'appid', 'playtime_forever']]
    return df_games

def concat_owned_games(df1, df2):
    df_games = pd.concat([df1, df2], ignore_index=True)
    return df_games

In [9]:
df_games = get_owned_games(steam_id)

In [11]:
download = False
if download:
    for i, item in df_friends.iloc[2000:3000].iteritems():
        df_games = concat_owned_games(df_games, get_owned_games(item))
        time.sleep(1)
        print(f'{i, item} Done...')
    
    df_games.to_csv('../data/raw/steamAPI_ownedGames.csv')
else:
    df_games = pd.read_csv('../data/raw/steamAPI_ownedGames.csv', index_col=0)

In [12]:
df_games.head()

Unnamed: 0,steamid,appid,playtime_forever
0,76561198123999673,50,2
1,76561198123999673,60,83
2,76561198123999673,70,1691
3,76561198123999673,130,408
4,76561198123999673,220,4357


In [13]:
df_games.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 99349 entries, 0 to 99348
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   steamid           99349 non-null  int64
 1   appid             99349 non-null  int64
 2   playtime_forever  99349 non-null  int64
dtypes: int64(3)
memory usage: 3.0 MB


### Get Game Tags

In [14]:
def get_game_tags(appid):
    page = f"https://store.steampowered.com/app/{appid}"
    response = requests.get(page)
    tags=[]
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        title = soup.find('div', {'id': 'appHubAppName'})
        app_tag = soup.find_all('a', {'class': 'app_tag'})
        for tag in app_tag:
            tags.append(tag.text.strip())

        return pd.DataFrame({'appid': appid, 'title': title, 'tags': [tags]})
    else:
        print('Game page not found!')
        return None

def concat_game_tags(df1, df2):
    df_tags = pd.concat([df1, df2], ignore_index=True)
    return df_tags

In [17]:
# df_tags = get_game_tags(730)

In [109]:
df_games.appid.value_counts()

730        855
218620     541
578080     521
304930     423
550        418
          ... 
468080       1
468050       1
467890       1
1784780      1
748300       1
Name: appid, Length: 14749, dtype: int64

In [106]:
download = False
if download:
    for i, (appid, count) in enumerate(df_games.appid.value_counts()[5000:6000].iteritems()):
        df_tags = concat_game_tags(df_tags, get_game_tags(appid))
        time.sleep(1)
        print(f'{i, appid} Done...')
    
    df_tags.drop_duplicates('appid', inplace=True)
    df_tags.to_csv('../data/raw/steamAPI_gameTags.csv')
else:
    df_tags = pd.read_csv('../data/raw/steamAPI_gameTags.csv', index_col=0).rename(columns={'0': 'appid'})

In [107]:
df_tags.head()

Unnamed: 0,appid,title,tags
0,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive..."
1,218620,PAYDAY 2,"['Co-op', 'Action', 'FPS', 'Heist', 'Looter Sh..."
2,578080,PUBG: BATTLEGROUNDS,"['Survival', 'Shooter', 'Multiplayer', 'Battle..."
3,304930,Unturned,"['Free to Play', 'Survival', 'Zombies', 'Open ..."
4,550,Left 4 Dead 2,"['Zombies', 'Co-op', 'FPS', 'Multiplayer', 'Sh..."


In [112]:
df_tags.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5003 entries, 0 to 5002
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   appid   5003 non-null   int64 
 1   title   4715 non-null   object
 2   tags    5003 non-null   object
dtypes: int64(1), object(2)
memory usage: 156.3+ KB
