In [None]:
import urllib2
import time
from datetime import datetime
from bs4 import BeautifulSoup
import re, os
import json
from lxml import etree, html
import requests
import pandas as pd
import numpy as np
#from lxml import html

# Steam API
## Friends From Friends
```
http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key=XXXX&steamid=XXXX&relationship=all
```

In [32]:
def get_friends(id):
    '''
    Starting with a single user ID, return their friends
    '''
    key = os.environ['ACCESS_STEAM']
    url = 'http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key='+key+'&steamid='+id+'&relationship=all'
    return json.loads(urllib2.urlopen(url).read())['friendslist']['friends']

## Basic User Info
```
http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=XXXX&steamids=XXXX,YYYY
```
* Comma-delimited list of 64 bit Steam IDs to return profile information for. Up to 100 Steam IDs can be requested.

In [30]:
def get_user_info(ids):
    '''
    For a user or users (up to 100 comma-delimited),
    return some basic user information
    '''
    key = os.environ['ACCESS_STEAM']
    url = 'http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key='+key+'&steamids='+ids
    return json.loads(urllib2.urlopen(url).read())['response']['players'][0]

## Player Bans
```
http://api.steampowered.com/ISteamUser/GetPlayerBans/v1/?key=XXXX&steamids=XXXX,YYYY
```


In [13]:
def get_bans(ids):
    '''
    Looks at the IDS ban history
    '''
    key = os.environ['ACCESS_STEAM']
    url = 'http://api.steampowered.com/ISteamUser/GetPlayerBans/v1/?key='+key+'&steamids='+ids
    return json.loads(urllib2.urlopen(url).read())['players']

# API Class Helper

In [217]:
class SteamStuff(object):
    """
    Class to keep track of Steam APIs.
    Ideally will return usful info.
    Pass in the Steam API key to get going!
    """
    
    def __init__(self, key, gameid=None):
        """
        Initialize with the Steam API key and Game ID (if needed)
        """
        self.key = key
        self.gameid = gameid
        self.player_dict = {}
    
    
    def get_bans(self, ids):
        """
        Looks at the 17-digit ID's ban history (up to 100 comma-delimited IDs)
        """
        url = 'http://api.steampowered.com/ISteamUser/GetPlayerBans/v1/?key='+self.key+'&steamids='+ids
        return json.loads(urllib2.urlopen(url).read())['players']
    
    
    def get_user_info(self, ids):
        """
        For a user or users (up to 100 comma-delimited),
        return some basic user information
        """
        url = 'http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key='+self.key+'&steamids='+ids
        return json.loads(urllib2.urlopen(url).read())['response']['players'][0]
    
    
    def get_friends(self, id, public=False):
        """
        INPUTS:
          self - API key
          id - 17-digit Steam ID
          public - NEEDS UPDATE - return only PUBLIC profiles (determined in get_user_info)
        OUTPUT:
          List of dictionaries, e.g.,
            {u'friend_since': 0,
             u'relationship': u'friend',
             u'steamid': u'76561197960559382'}
        Starting with a single user ID, return their friends
        If public, only return users who have public profiles
        """
        url = 'http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key='+self.key+'&steamid='+id+'&relationship=all'
        return json.loads(urllib2.urlopen(url).read())['friendslist']['friends']
    
    
    def get_game_info(self, id):
        """
        INPUTS:
          self - API key
          id - 17-digit Steam ID
        OUTPUT:
          List of dictionaries, e.g.,
            {u'appid': 10,
             u'has_community_visible_stats': True,
             u'img_icon_url': u'6b0312cda02f5f777efa2f3318c307ff9acafbb5',
             u'img_logo_url': u'af890f848dd606ac2fd4415de3c3f5e7a66fcb9f',
             u'name': u'Counter-Strike',
             }
             
        Get owned game information for a give user
        Returns a list of dictionaries
        """
        url =  "http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key="+self.key+"&steamid="+id+"&include_appinfo=1&include_played_free_games=1&format=json"
        return json.loads(urllib2.urlopen(url).read())["response"]["games"]
    
    
    def get_player_achievments(self, id, appid):
        """
        Given a player ID and appid, return a list of achievments
        """
        url = "http://api.steampowered.com/ISteamUserStats/GetPlayerAchievements/v0001/?appid="+appid+"&key="+self.key+"&steamid="+id
        playerstats = json.loads(urllib2.urlopen(url).read())["playerstats"]
        return {k: playerstats[k] for k in ("achievements", "gameName")}
    
    
    def scrape_profile(self, id):
        """
        Look at a user's community profile
        Maybe include '?xml=1' at the end of the url?
        """
        url = "http://steamcommunity.com/profiles/"+id
        pass
    
    
    def game_info(self, appid):
        """
        Get the information for a given game / appid
        """
        url = ""
        pass
        
        
    def get_global_achievements(self, appid):
        """
        Get the global achievement stats, kinda intersting
        Currently returns a LIST of achievement name / global completion percentages; maybe average one number?
        """
        url = "http://api.steampowered.com/ISteamUserStats/GetGlobalAchievementPercentagesForApp/v0002/?gameid="+appid+"&format=json"
        return json.loads(urllib2.urlopen(url).read())["achievementpercentages"]["achievements"]
    
    
    def build_dict(self, id):
        """
        Reference above functions to build a player info dictionary
        Eventually do a self.id kinda thang, naw mean?
        """
        self.player_dict["ban_status"] = self.get_bans(id)
        self.player_dict["friends"] = self.get_friends(id)
        self.player_dict["game_info"] = self.get_game_info(id)
        self.player_dict["user_info"] = self.get_user_info(id)
        #self.player_dict["achievments"] = self.get_player_achievments() needs a specific game
    
    
    def check_out(self):
        """
        Returns a built-up dictionary
        """
        return self.player_dict

In [215]:
user_info = SteamStuff(os.environ["ACCESS_STEAM"])

In [221]:
len(user_info.get_user_info(id1))

15

In [242]:
minuser = user_info.get_user_info("76561198055995417")

In [271]:
mytime = user_info.get_game_info("76561198080951917")

In [272]:
timesum = 0
for game in mytime:
    timesum += game["playtime_forever"]

HTTPError: HTTP Error 403: Forbidden

In [259]:
import time

In [260]:
me = user_info.get_user_info(my_id)

In [277]:
time.gmtime(1385160424)

time.struct_time(tm_year=2013, tm_mon=11, tm_mday=22, tm_hour=22, tm_min=47, tm_sec=4, tm_wday=4, tm_yday=326, tm_isdst=0)

In [266]:
maxuser

[u'steamid',
 u'primaryclanid',
 u'realname',
 u'personaname',
 u'personastate',
 u'personastateflags',
 u'communityvisibilitystate',
 u'loccountrycode',
 u'profilestate',
 u'profileurl',
 u'loccityid',
 u'timecreated',
 u'avatar',
 u'locstatecode',
 u'commentpermission',
 u'avatarfull',
 u'avatarmedium',
 u'lastlogoff']

In [268]:
"timecreated" 

True

In [270]:
user_info.get_user_info("76561197982503327")

{u'avatar': u'https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/00/00a40297103baaf965c72cff9505517e82d1e6e6.jpg',
 u'avatarfull': u'https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/00/00a40297103baaf965c72cff9505517e82d1e6e6_full.jpg',
 u'avatarmedium': u'https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/00/00a40297103baaf965c72cff9505517e82d1e6e6_medium.jpg',
 u'communityvisibilitystate': 3,
 u'lastlogoff': 1460403784,
 u'personaname': u'Saqphire',
 u'personastate': 0,
 u'personastateflags': 0,
 u'primaryclanid': u'103582791435567598',
 u'profilestate': 1,
 u'profileurl': u'http://steamcommunity.com/id/stoppy250/',
 u'steamid': u'76561197982503327',
 u'timecreated': 1147247124}

In [234]:
for dude in look_at_me:
    print len(user_info.get_user_info(dude)), dude

15 76561197964141676
13 76561197982503327
14 76561198008493646
16 76561198020514035
17 76561198021157234
17 76561198028020632
15 76561198034419491
16 76561198037675578
16 76561198041067323
14 76561198048059710
15 76561198050672678
14 76561198051124946
10 76561198055995417
16 76561198057213632
13 76561198057500019
16 76561198058486398
16 76561198061145396
10 76561198061166639
16 76561198065792404
15 76561198068205028
18 76561198068988796
14 76561198069850892
15 76561198070196313
13 76561198072611115
15 76561198072900944
14 76561198073254941
15 76561198074486038
11 76561198075361548
14 76561198079411549
15 76561198079734968
16 76561198080050523
18 76561198081538486
15 76561198081599434
17 76561198083397058
16 76561198086190488
16 76561198091268651
14 76561198091820426
14 76561198092279597
15 76561198093338574
14 76561198094820952
15 76561198095698406
14 76561198096818345
15 76561198098219381
14 76561198100354694
17 76561198101511272
15 76561198101557876
17 76561198101866590
14 7656119810

In [149]:
my_id = "76561197967398882"
csid = "730"

In [164]:
user_info.get_player_achievments("76561198092689293", csid)

{'achievements': [{u'achieved': 1, u'apiname': u'WIN_BOMB_PLANT'},
  {u'achieved': 1, u'apiname': u'BOMB_PLANT_LOW'},
  {u'achieved': 1, u'apiname': u'BOMB_DEFUSE_LOW'},
  {u'achieved': 1, u'apiname': u'KILL_ENEMY_LOW'},
  {u'achieved': 1, u'apiname': u'KILL_ENEMY_MED'},
  {u'achieved': 1, u'apiname': u'KILL_ENEMY_HIGH'},
  {u'achieved': 1, u'apiname': u'BOMB_DEFUSE_CLOSE_CALL'},
  {u'achieved': 1, u'apiname': u'KILL_BOMB_DEFUSER'},
  {u'achieved': 1, u'apiname': u'WIN_BOMB_DEFUSE'},
  {u'achieved': 1, u'apiname': u'BOMB_PLANT_IN_25_SECONDS'},
  {u'achieved': 1, u'apiname': u'WIN_ROUNDS_LOW'},
  {u'achieved': 1, u'apiname': u'WIN_ROUNDS_MED'},
  {u'achieved': 1, u'apiname': u'WIN_ROUNDS_HIGH'},
  {u'achieved': 1, u'apiname': u'GIVE_DAMAGE_LOW'},
  {u'achieved': 1, u'apiname': u'GIVE_DAMAGE_MED'},
  {u'achieved': 1, u'apiname': u'GIVE_DAMAGE_HIGH'},
  {u'achieved': 1, u'apiname': u'KILLING_SPREE'},
  {u'achieved': 1, u'apiname': u'KILL_WITH_OWN_GUN'},
  {u'achieved': 0, u'apiname': u'RE

In [161]:
{k: player_ach[k] for k in ("achievements", "gameName")}

{'achievements': [{u'achieved': 1, u'apiname': u'WIN_BOMB_PLANT'},
  {u'achieved': 1, u'apiname': u'BOMB_PLANT_LOW'},
  {u'achieved': 1, u'apiname': u'BOMB_DEFUSE_LOW'},
  {u'achieved': 1, u'apiname': u'KILL_ENEMY_LOW'},
  {u'achieved': 1, u'apiname': u'KILL_ENEMY_MED'},
  {u'achieved': 1, u'apiname': u'KILL_ENEMY_HIGH'},
  {u'achieved': 1, u'apiname': u'BOMB_DEFUSE_CLOSE_CALL'},
  {u'achieved': 1, u'apiname': u'KILL_BOMB_DEFUSER'},
  {u'achieved': 1, u'apiname': u'WIN_BOMB_DEFUSE'},
  {u'achieved': 1, u'apiname': u'BOMB_PLANT_IN_25_SECONDS'},
  {u'achieved': 1, u'apiname': u'WIN_ROUNDS_LOW'},
  {u'achieved': 1, u'apiname': u'WIN_ROUNDS_MED'},
  {u'achieved': 1, u'apiname': u'WIN_ROUNDS_HIGH'},
  {u'achieved': 1, u'apiname': u'GIVE_DAMAGE_LOW'},
  {u'achieved': 1, u'apiname': u'GIVE_DAMAGE_MED'},
  {u'achieved': 1, u'apiname': u'GIVE_DAMAGE_HIGH'},
  {u'achieved': 1, u'apiname': u'KILLING_SPREE'},
  {u'achieved': 1, u'apiname': u'KILL_WITH_OWN_GUN'},
  {u'achieved': 0, u'apiname': u'RE

In [114]:
user_info.build_dict(my_id)

In [115]:
user_info.check_out()

{'ban_status': [{u'CommunityBanned': False,
   u'DaysSinceLastBan': 0,
   u'EconomyBan': u'none',
   u'NumberOfGameBans': 0,
   u'NumberOfVACBans': 0,
   u'SteamId': u'76561197967398882',
   u'VACBanned': False}],
 'friends': [{u'friend_since': 0,
   u'relationship': u'friend',
   u'steamid': u'76561197960559382'},
  {u'friend_since': 0,
   u'relationship': u'friend',
   u'steamid': u'76561197965632101'},
  {u'friend_since': 1439867952,
   u'relationship': u'friend',
   u'steamid': u'76561197982823731'},
  {u'friend_since': 1404070051,
   u'relationship': u'friend',
   u'steamid': u'76561197999882487'},
  {u'friend_since': 1235243760,
   u'relationship': u'friend',
   u'steamid': u'76561198001133491'},
  {u'friend_since': 1323645317,
   u'relationship': u'friend',
   u'steamid': u'76561198040942123'},
  {u'friend_since': 1436489293,
   u'relationship': u'friend',
   u'steamid': u'76561198086767969'}],
 'game_info': {u'game_count': 57,
  u'games': [{u'appid': 10, u'playtime_forever': 0}

## Game Schema; not sure if worth looking into

In [166]:
url = "http://api.steampowered.com/ISteamUserStats/GetSchemaForGame/v2/?key="+os.environ["ACCESS_STEAM"]+"&appid=730"

In [176]:
gamestuff = json.loads(urllib2.urlopen(url).read())

In [180]:
gamestuff["game"].keys()

[u'gameVersion', u'availableGameStats', u'gameName']

In [189]:
gamestuff["game"]["availableGameStats"]["achievements"]

[{u'defaultvalue': 0,
  u'description': u'Win a round by planting a bomb',
  u'displayName': u'Someone Set Up Us The Bomb',
  u'hidden': 0,
  u'icon': u'http://cdn.akamai.steamstatic.com/steamcommunity/public/images/apps/730/9f60ea3c56b4ab248ab598bbd62568b953116301.jpg',
  u'icongray': u'http://cdn.akamai.steamstatic.com/steamcommunity/public/images/apps/730/978bdbc78917f57180549c864a9c7dcc1711dfec.jpg',
  u'name': u'WIN_BOMB_PLANT'},
 {u'defaultvalue': 0,
  u'description': u'Plant 100 bombs',
  u'displayName': u'Boomala Boomala',
  u'hidden': 0,
  u'icon': u'http://cdn.akamai.steamstatic.com/steamcommunity/public/images/apps/730/325ccbd68e599083c1597fd90dc6d4265d1ca3b4.jpg',
  u'icongray': u'http://cdn.akamai.steamstatic.com/steamcommunity/public/images/apps/730/a4f62b47ba5a852c71af6fe35411addbc0d1d193.jpg',
  u'name': u'BOMB_PLANT_LOW'},
 {u'defaultvalue': 0,
  u'description': u'Defuse 100 bombs successfully',
  u'displayName': u'The Hurt Blocker',
  u'hidden': 0,
  u'icon': u'http:/

In [3]:
id1 = "76561198092689293"
url = "http://steamcommunity.com/profiles/"+id1

In [12]:
url

'http://steamcommunity.com/profiles/76561198092689293'

In [8]:
text = requests.get(url).text

In [9]:
rm_delims = re.sub(r"[\t\r\n]+", r" ", text)
rm_breaks = re.sub(r"\<br\>+", r" ", rm_delims).strip()

In [10]:
soup = BeautifulSoup(rm_breaks, "lxml")

# Scraping some profile info

`soup.find_all("div", "profile_count_link")`

Using index of -1 below prevents an error for "inventory", which only had one index, where others had 2

In [48]:
profile_dict = {}
profile_dict["stuff"] = [{list(link.stripped_strings)[0]:list(link.stripped_strings)[-1]} for link in profile_links]
profile_dict

{'stuff': [{u'Badges': u'5'},
  {u'Games': u'32'},
  {u'Inventory': u'Inventory'},
  {u'Screenshots': u'145'},
  {u'Reviews': u'3'},
  {u'Groups': u'26'},
  {u'Friends': u'83'}]}

`soup.find_all("div", "profile_count_link")`

In [56]:
profile_dict["online_status"] = [list(link.stripped_strings) for link in soup.find_all("div", "responsive_status_info")]

In [57]:
profile_dict

{'online_status': [[u'Currently In-Game',
   u"Sid Meier's Civilization V",
   u'Join Game']],
 'stuff': [{u'Badges': u'5'},
  {u'Games': u'32'},
  {u'Inventory': u'Inventory'},
  {u'Screenshots': u'145'},
  {u'Reviews': u'3'},
  {u'Groups': u'26'},
  {u'Friends': u'83'}]}

In [72]:
soup.find_all("div", "showcase_stat")[0].attrs['data-community-tooltip']

'659 achievements in 16 different games.'

In [64]:
[''.join(link.text) for link in soup.find_all("div", "showcase_stat")]

[u' 659 Achievements ', u' 23% Avg. Game Completion Rate ']

In [79]:
recent_3_games = soup.find_all("div", "game_info")

In [80]:
[list(game.stripped_strings) for game in recent_3_games]

[[u'222 hrs on record  Currently In-Game', u"Sid Meier's Civilization V"],
 [u'847 hrs on record  last played on Apr 8', u'Team Fortress 2'],
 [u'3.6 hrs on record  last played on Apr 7', u'Dino D-Day']]

In [82]:
other_stuff = soup.find_all("div", "profile_customization_area")

In [88]:
[list(thing.stripped_strings) for thing in other_stuff]

[[u'Achievement Showcase',
  u'+653',
  u'659',
  u'Achievements',
  u'23%',
  u'Avg. Game Completion Rate']]

In [175]:
soup.find_all("div", "persona_name persona_level")[0].text

u'Level 10'

In [18]:
import pandas as pd

In [47]:
banned_df = pd.DataFrame(banned_list)

In [76]:
another = 'http://steamcommunity.com/profiles/76561198092689293/?xml=1'

In [77]:
page = requests.get(another)

In [78]:
text = page.text

In [138]:
with open("/Users/johnnysand/Downloads/Counter_Strike_Global_Offensive.jsonlines") as f:
    CSGO = [json.loads(review) for review in f]

In [140]:
csdf = pd.DataFrame(CSGO)

In [147]:
get_bans('54432109565543')

[]

In [151]:
csdf.columns

Index([u'achievement_progress', u'date_posted', u'date_updated',
       u'found_helpful_percentage', u'friend_player_level', u'num_badges',
       u'num_comments', u'num_found_funny', u'num_found_helpful',
       u'num_found_unhelpful', u'num_friends', u'num_games_owned',
       u'num_groups', u'num_guides', u'num_reviews', u'num_screenshots',
       u'num_voted_helpfulness', u'num_workshop_items', u'orig_url',
       u'profile_url', u'rating', u'review', u'review_url', u'steam_id_number',
       u'total_game_hours', u'total_game_hours_last_two_weeks', u'username'],
      dtype='object')

In [153]:
csdf[np.array(map(lambda x: len(x), csdf['steam_id_number']))==17]['steam_id_number'].count()

1680

In [145]:
csdf['steam_id_number']

0       76561198092689293
1                 Delta3D
2                   kadiv
3                    knal
4       76561198003457024
5           hiaympalliman
6                 ZoomaAP
7           RevanDaDragon
8           greybutnotgey
9               End3rb0rn
10              broilertm
11      76561198078752967
12             TURBOSKILL
13      76561198028703104
14      76561198066593729
15                 w0lks4
16             seeneesini
17      76561198072154474
18                 eys912
19      76561198092689293
20                Delta3D
21                  kadiv
22                   knal
23      76561198003457024
24          hiaympalliman
25                ZoomaAP
26          RevanDaDragon
27          greybutnotgey
28              End3rb0rn
29                 Erincl
              ...        
7043           PePeIsGOD2
7044    76561198093345061
7045              fili716
7046    76561198043999022
7047      stealth_assasin
7048          Trident3553
7049            Holtimsky
7050        

In [80]:
text = re.sub(r'\<br\>', r' ', text) # Looks for the string "<br>"
    # and replaces it with a space
text = re.sub(r'[\n\t\r ]+', r' ', text) # Looks for any sequence
    # of carriage returns or whitespace characters and replaces
    # them with a single space
text = text.strip()

In [93]:
soup = BeautifulSoup(text, 'lxml')

In [62]:
friend_list = list(friend_set)

In [122]:
def hundreds_of_friends(friends):
    index100 = 100
    hundred_list = []
    bans = []
    while index100 <= 22000:
        hundred_friends = ','.join(friends[index100-100:index100])
        hundred_list.append(hundred_friends)
        index100 += 100
        bans.append(get_bans(hundred_friends))
    return hundred_list, [ban for subbans in bans for ban in subbans]

In [82]:
def get_bans(ids):
    '''
    Looks at the IDS ban history
    '''
    key = os.environ['ACCESS_STEAM']
    url = 'http://api.steampowered.com/ISteamUser/GetPlayerBans/v1/?key='+key+'&steamids='+ids
    return json.loads(urllib2.urlopen(url).read())['players']

In [85]:
get_bans(','.join(friend_list[:2]))

[{u'CommunityBanned': False,
  u'DaysSinceLastBan': 0,
  u'EconomyBan': u'none',
  u'NumberOfGameBans': 0,
  u'NumberOfVACBans': 0,
  u'SteamId': u'76561198049211725',
  u'VACBanned': False},
 {u'CommunityBanned': True,
  u'DaysSinceLastBan': 353,
  u'EconomyBan': u'none',
  u'NumberOfGameBans': 0,
  u'NumberOfVACBans': 1,
  u'SteamId': u'76561198083499079',
  u'VACBanned': True}]

In [27]:
import time
#start_time = time.time()
#main()
#print("--- %s seconds ---" % (time.time() - start_time))

In [57]:
mythat = '9BD62BA0CD75599091D3C1257782D97D'
summaries = 'http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=9BD62BA0CD75599091D3C1257782D97D&steamids='
friend_set = []
start_time = time.time()
for id in ids:
    state = json.loads(urllib2.urlopen(summaries + id).read())
    if state['response']['players'][0][u'communityvisibilitystate'] != 3:
        continue
    else:
        new_friends = get_friends(id)
        for friend in new_friends:
            friend_set.add(friend['steamid'])
print '%s seconds' % (time.time() - start_time)

60.8479239941 seconds


In [28]:
mythat = '9BD62BA0CD75599091D3C1257782D97D'
summaries = 'http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=9BD62BA0CD75599091D3C1257782D97D&steamids='
steamids = set()
start_time = time.time()
banned_set = set()
for id in ids:
    state = json.loads(urllib2.urlopen(summaries + id).read())
    if state['response']['players'][0][u'communityvisibilitystate'] != 3:
        continue
    else:
        bans = get_bans(id)
        friends = get_friends(id)
        for friend in more_friends['friendslist']['friends']:
            steamids.add(str(friend['steamid']))
        steamids.add(friends)
print '%s seconds' % (time.time() - start_time)

NameError: name 'steamids_new' is not defined

In [None]:
mythat = '9BD62BA0CD75599091D3C1257782D97D'
summaries = 'http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=9BD62BA0CD75599091D3C1257782D97D&steamids='
steamids = set()
start_time = time.time()
banned_set = set()
for id in ids:
    state = json.loads(urllib2.urlopen(summaries + id).read())
    if state['response']['players'][0][u'communityvisibilitystate'] != 3:
        continue
    else:
        bans = get_bans(id)
        friends = 'http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key='+mythat+'&steamid='+id+'&relationship=all'
        more_friends = json.loads(urllib2.urlopen(friends).read())
        for friend in more_friends['friendslist']['friends']:
            steamids.add(str(friend['steamid']))
print '%s seconds' % (time.time() - start_time)

In [155]:
7796 / 60.

129.93333333333334

In [154]:
len(steamids_new)

1627893

In [108]:
requests.get(urlfriend)

<Response [401]>

In [112]:
myurl = 'http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key='+mythat+'&steamids='+my64

In [113]:
myurl

'http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=9BD62BA0CD75599091D3C1257782D97D&steamids=76561197967398882'

In [114]:
another_test = 'http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=9BD62BA0CD75599091D3C1257782D97D&steamids=76561197982823731'

In [116]:
dude = json.loads(urllib2.urlopen(another_test).read())

In [138]:
type(dude['response']['players'][0][u'communityvisibilitystate'])

int

'{\n\t"response": {\n\t\t"players": [\n\t\t\t{\n\t\t\t\t"steamid": "76561197982823731",\n\t\t\t\t"communityvisibilitystate": 1,\n\t\t\t\t"profilestate": 1,\n\t\t\t\t"personaname": "Warden",\n\t\t\t\t"lastlogoff": 1459618089,\n\t\t\t\t"profileurl": "http://steamcommunity.com/profiles/76561197982823731/",\n\t\t\t\t"avatar": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/ef/efd4c085347b13ac7201328c783529a11a5d33a1.jpg",\n\t\t\t\t"avatarmedium": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/ef/efd4c085347b13ac7201328c783529a11a5d33a1_medium.jpg",\n\t\t\t\t"avatarfull": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/ef/efd4c085347b13ac7201328c783529a11a5d33a1_full.jpg",\n\t\t\t\t"personastate": 0\n\t\t\t}\n\t\t]\n\t\t\n\t}\n}'

http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=9BD62BA0CD75599091D3C1257782D97D&steamids=7656119798282373176561197965632101


In [156]:
22000 / 150.

146.66666666666666

In [157]:
1627000 / 2200.

739.5454545454545

In [158]:
1630000 * 1000

1630000000

In [159]:
'76561197967398882' in steamids_new

True

In [160]:
'76561197967398882' in steamids_new_new

False

# Another try

In [20]:
appid = '730'
range_begin = '0'
i = '1'
url = 'http://steamcommunity.com/app/{0}/homecontent/?userreviewsoffset={1}&p=1&itemspage={2}&screenshotspage={2}&videospage={2}&artpage={2}&allguidepage={2}&webguidepage={2}&integratedguidepage={2}&discussionspage={2}&appid={0}&appHubSubSection=10&appHubSubSection=10&l=english&browsefilter=toprated&filterLanguage=default&searchText=&forceanon=1'.format(appid, range_begin, i)
print(url)

http://steamcommunity.com/app/730/homecontent/?userreviewsoffset=0&p=1&itemspage=1&screenshotspage=1&videospage=1&artpage=1&allguidepage=1&webguidepage=1&integratedguidepage=1&discussionspage=1&appid=730&appHubSubSection=10&appHubSubSection=10&l=english&browsefilter=toprated&filterLanguage=default&searchText=&forceanon=1


In [21]:
# We can read, parse, and then extract the content at the URL using
# requests and bs4 (and lxml) modules
from bs4 import BeautifulSoup
from lxml import html
import requests

In [22]:
# Let's use requests.get() to get the page
page = requests.get(url)

In [23]:
# Let's take a look at the attributes of the page object
[a for a in dir(page) if not a.startswith('_') and not a.endswith('_')]
# Don't worry about the code here, it's just a trick to see public
# methods for a requests object

['apparent_encoding',
 'close',
 'connection',
 'content',
 'cookies',
 'elapsed',
 'encoding',
 'headers',
 'history',
 'is_permanent_redirect',
 'is_redirect',
 'iter_content',
 'iter_lines',
 'json',
 'links',
 'ok',
 'raise_for_status',
 'raw',
 'reason',
 'request',
 'status_code',
 'text',
 'url']

In [24]:
# We see that there are attributes for the text, json, lines, etc.,
# so let's take a look at some of this stuff
page.text[:1000] # Here's the raw HTML

u'\t\t<div id="page1">\r\n\t\t<div class="apphub_Card modalContentLink interactable" style="display: none" data-modal-content-url="http://steamcommunity.com/profiles/76561198092689293/recommended/730/" data-modal-content-sizetofit="false">\r\n\t<div class="apphub_CardContentMain">\r\n\t\t<div class="apphub_UserReviewCardContent">\r\n\t\t\t<div class="found_helpful">\r\n\t\t\t\t9,379 of 10,082 people (93%) found this review helpful<br>12,757 people found this review funny\t\t\t</div>\r\n\r\n\t\t\t<div class="vote_header">\r\n\t\t\t\t\t\t\t\t<div class="reviewInfo">\r\n\t\t\t\t\t<div class="thumb">\r\n\t\t\t\t\t\t<img src="http://steamcommunity-a.akamaihd.net/public/shared/images/userreviews/icon_thumbsUp.png?v=1" width="44" height="44">\r\n\t\t\t\t\t</div>\r\n\r\n\t\t\t\t\t\t\t\t\t\t<div class="title">Recommended</div>\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<div class="hours">453.5 hrs on record</div>\r\n\t\t\t\t\t\t\t\t\t</div>\r\n\t\t\t\t<div style="clear: left"></div>\r\n\t\t\t</div>\r\n\r

In [25]:
# After looking at some of the other attributes, I've determined
# that the text attribute is probably the only thing that
# concerns us, so let's use it
text = page.text

In [26]:
# The text, as you can see from the view above, has lots of \r,
# \n, \t characters in it, which might not be good for HTML
# parsing in our case (I won't get into why, partly because I'm
# not completely sure I get why), so let's get rid of all such
# characters, replacing them with spaces instead
# To do this we will use the re module, which allows us to use
# regular expressions
import re
# While we're at it, it's best to get rid of all "<br>" tags since
# they could also present problems during parsing
# We can use the re.sub() method to find one regular expression and
# replace it with another in a given text
text = re.sub(r'\<br\>', r' ', text) # Looks for the string "<br>"
    # and replaces it with a space
text = re.sub(r'[\n\t\r ]+', r' ', text) # Looks for any sequence
    # of carriage returns or whitespace characters and replaces
    # them with a single space
text = text.strip() # Get rid of spaces at either end

In [27]:
# Let's take a look at the cleaned-up version of the source HTML
text[:1000]

u'<div id="page1"> <div class="apphub_Card modalContentLink interactable" style="display: none" data-modal-content-url="http://steamcommunity.com/profiles/76561198092689293/recommended/730/" data-modal-content-sizetofit="false"> <div class="apphub_CardContentMain"> <div class="apphub_UserReviewCardContent"> <div class="found_helpful"> 9,379 of 10,082 people (93%) found this review helpful 12,757 people found this review funny </div> <div class="vote_header"> <div class="reviewInfo"> <div class="thumb"> <img src="http://steamcommunity-a.akamaihd.net/public/shared/images/userreviews/icon_thumbsUp.png?v=1" width="44" height="44"> </div> <div class="title">Recommended</div> <div class="hours">453.5 hrs on record</div> </div> <div style="clear: left"></div> </div> <div class="apphub_CardTextContent"> <div class="date_posted">Posted: February 13, 2015</div> If i had a dollar for each time someone screamed at me in another language, i\'d still have no money because i spent it on skins </div> 

In [28]:
soup = BeautifulSoup(text, "lxml")

In [31]:
soup.contents[0]

<html><body><div id="page1"> <div class="apphub_Card modalContentLink interactable" data-modal-content-sizetofit="false" data-modal-content-url="http://steamcommunity.com/profiles/76561198092689293/recommended/730/" style="display: none"> <div class="apphub_CardContentMain"> <div class="apphub_UserReviewCardContent"> <div class="found_helpful"> 9,379 of 10,082 people (93%) found this review helpful 12,757 people found this review funny </div> <div class="vote_header"> <div class="reviewInfo"> <div class="thumb"> <img height="44" src="http://steamcommunity-a.akamaihd.net/public/shared/images/userreviews/icon_thumbsUp.png?v=1" width="44"/> </div> <div class="title">Recommended</div> <div class="hours">453.5 hrs on record</div> </div> <div style="clear: left"></div> </div> <div class="apphub_CardTextContent"> <div class="date_posted">Posted: February 13, 2015</div> If i had a dollar for each time someone screamed at me in another language, i'd still have no money because i spent it on ski

In [40]:
review_sections = soup.find_all("div", "apphub_CardContentMain")
review = review_sections[5]

In [41]:
review.text

u'   11,320 of 12,327 people (92%) found this review helpful 13,940 people found this review funny       Recommended 287.4 hrs on record     Posted: November 1, 2015 Every team consists of 5 people: 1.You 2.Russian Guy 3.Another Russian Guy 4.Russian Guy who speaks English 5. A 6 year old kid who slept with your mother 11/10 Too much Salt and \u0421*\u043a\u0430 \u0411\u043b\u0438@\u0442,not enough \u2665\u2665\u2665\u2665\u2665 Would love to take another dose of salt and punch my little brother    \xa0  '

In [42]:
link_blocks = list(soup.findAll('div',
                                'apphub_Card modalContentLink interactable'))
link_block = link_blocks[0]

In [43]:
review_url = link_block.attrs['data-modal-content-url']
review_url_split = review_url.split('/')
review_url

'http://steamcommunity.com/profiles/76561198092689293/recommended/730/'

In [44]:
profile_url = '/'.join(review_url_split[:5])
profile_url

'http://steamcommunity.com/profiles/76561198092689293'

In [45]:
list(review.children)

[u' ',
 <div class="apphub_UserReviewCardContent"> <div class="found_helpful"> 11,320 of 12,327 people (92%) found this review helpful 13,940 people found this review funny </div> <div class="vote_header"> <div class="reviewInfo"> <div class="thumb"> <img height="44" src="http://steamcommunity-a.akamaihd.net/public/shared/images/userreviews/icon_thumbsUp.png?v=1" width="44"/> </div> <div class="title">Recommended</div> <div class="hours">287.4 hrs on record</div> </div> <div style="clear: left"></div> </div> <div class="apphub_CardTextContent"> <div class="date_posted">Posted: November 1, 2015</div> Every team consists of 5 people: 1.You 2.Russian Guy 3.Another Russian Guy 4.Russian Guy who speaks English 5. A 6 year old kid who slept with your mother 11/10 Too much Salt and \u0421*\u043a\u0430 \u0411\u043b\u0438@\u0442,not enough \u2665\u2665\u2665\u2665\u2665 Would love to take another dose of salt and punch my little brother </div> </div>,
 u' ',
 <div class="UserReviewCardContent_F

In [46]:
list(review.descendants)

[u' ',
 <div class="apphub_UserReviewCardContent"> <div class="found_helpful"> 11,320 of 12,327 people (92%) found this review helpful 13,940 people found this review funny </div> <div class="vote_header"> <div class="reviewInfo"> <div class="thumb"> <img height="44" src="http://steamcommunity-a.akamaihd.net/public/shared/images/userreviews/icon_thumbsUp.png?v=1" width="44"/> </div> <div class="title">Recommended</div> <div class="hours">287.4 hrs on record</div> </div> <div style="clear: left"></div> </div> <div class="apphub_CardTextContent"> <div class="date_posted">Posted: November 1, 2015</div> Every team consists of 5 people: 1.You 2.Russian Guy 3.Another Russian Guy 4.Russian Guy who speaks English 5. A 6 year old kid who slept with your mother 11/10 Too much Salt and \u0421*\u043a\u0430 \u0411\u043b\u0438@\u0442,not enough \u2665\u2665\u2665\u2665\u2665 Would love to take another dose of salt and punch my little brother </div> </div>,
 u' ',
 <div class="found_helpful"> 11,320 

In [47]:
review.getText(separator=",,,")

u' ,,, ,,, 11,320 of 12,327 people (92%) found this review helpful 13,940 people found this review funny ,,, ,,, ,,, ,,, ,,, ,,, ,,,Recommended,,, ,,,287.4 hrs on record,,, ,,, ,,, ,,, ,,, ,,,Posted: November 1, 2015,,, Every team consists of 5 people: 1.You 2.Russian Guy 3.Another Russian Guy 4.Russian Guy who speaks English 5. A 6 year old kid who slept with your mother 11/10 Too much Salt and \u0421*\u043a\u0430 \u0411\u043b\u0438@\u0442,not enough \u2665\u2665\u2665\u2665\u2665 Would love to take another dose of salt and punch my little brother ,,, ,,, ,,, ,,,\xa0,,, ,,, '

In [48]:
stripped_strings = list(review.stripped_strings)
stripped_strings

[u'11,320 of 12,327 people (92%) found this review helpful 13,940 people found this review funny',
 u'Recommended',
 u'287.4 hrs on record',
 u'Posted: November 1, 2015',
 u'Every team consists of 5 people: 1.You 2.Russian Guy 3.Another Russian Guy 4.Russian Guy who speaks English 5. A 6 year old kid who slept with your mother 11/10 Too much Salt and \u0421*\u043a\u0430 \u0411\u043b\u0438@\u0442,not enough \u2665\u2665\u2665\u2665\u2665 Would love to take another dose of salt and punch my little brother']

In [49]:
[len(list(review_sections[i].stripped_strings)) for i in range(10)]

[5, 5, 5, 5, 5, 5, 19, 5, 5, 5]

In [50]:
list(review_sections[-3].stripped_strings)

[u'9,472 of 10,379 people (91%) found this review helpful 11,089 people found this review funny',
 u'Recommended',
 u'1,813.6 hrs on record',
 u'Posted: June 5, 2015',
 u"It's like Dota 2 but with less wizards and more Russians."]

In [51]:
stripped_strings2 = ['1,703 of 1,977 people (86%) found this review helpful 1 person found this review funny', 'Recommended', '598.1 hrs on record', 'Posted: January 17, 2014', "I've tried Rosetta Stone, duolingo, and even college classes, but nothing has taught me Russian or Brazilian better than this! And it's free!", '76 products in account', '21']
stripped_strings2

['1,703 of 1,977 people (86%) found this review helpful 1 person found this review funny',
 'Recommended',
 '598.1 hrs on record',
 'Posted: January 17, 2014',
 "I've tried Rosetta Stone, duolingo, and even college classes, but nothing has taught me Russian or Brazilian better than this! And it's free!",
 '76 products in account',
 '21']

In [52]:
re.search(r', \d{4}$', 'January 17, 2014')

<_sre.SRE_Match at 0x103aa8648>

In [53]:
from dateutil import parser
date = parser.parse('January 17, 2014')
date.day

17

In [54]:
stripped_strings3 = ['805 of 962 people (84%) found this review helpful', 'Recommended', '682.5 hrs on record', 'Posted: January 22, 2014', 'K I N T E R', '77 products in account', '1']
stripped_strings3

['805 of 962 people (84%) found this review helpful',
 'Recommended',
 '682.5 hrs on record',
 'Posted: January 22, 2014',
 'K I N T E R',
 '77 products in account',
 '1']

In [55]:
# Given the stripped_strings attribute, the relevant information can be extracted
# exceedingly easily
helpful_funny = stripped_strings[0]
recommended = stripped_strings[1]
hours = re.sub(r',',
               r'',
               stripped_strings[2].split()[0])
date_posted = stripped_strings[3][8:]
review = stripped_strings[4]
#products_in_account = stripped_strings[6].split()[0]

In [56]:
recommended

u'Recommended'

In [57]:
helpful = helpful_funny.split()[:9]
funny = helpful_funny.split()[9:]
found_helpful = re.sub(r',',
                       r'',
                       helpful[0])
total_helpful_candidates = re.sub(r',',
                                  r'',
                                  helpful[2])
helpful_percentage = float(found_helpful)/float(total_helpful_candidates)
print("found helpful: {}\ntotal people that could have found it helpful: {}"
      "\npercentage of people who found the review helpful: "
      "{}%".format(found_helpful,
                   total_helpful_candidates,
                   helpful_percentage))

found_funny = funny[0]
print("found review funny: {}".format(found_funny))

found helpful: 11320
total people that could have found it helpful: 12327
percentage of people who found the review helpful: 0.918309402125%
found review funny: 13,940


In [198]:
date_posted += ', 2015'

In [58]:
date_posted

u'November 1, 2015'

In [59]:
# Let's define a dictionary with all of the stuff that's being collected
review_dict = dict(review_url=review_url,
                   profile_url=profile_url,
                   recommended=recommended,
                   hours=hours,
                   date_posted=date_posted,
                   review=review,
                   #products_in_account=products_in_account,
                   found_helpful=found_helpful,
                   total_found_helpful_candidates=total_helpful_candidates,
                   found_helpful_percentage=helpful_percentage,
                   found_funny=found_funny)
review_dict

{'date_posted': u'November 1, 2015',
 'found_funny': u'13,940',
 'found_helpful': u'11320',
 'found_helpful_percentage': 0.9183094021254158,
 'hours': u'287.4',
 'profile_url': 'http://steamcommunity.com/profiles/76561198092689293',
 'recommended': u'Recommended',
 'review': u'Every team consists of 5 people: 1.You 2.Russian Guy 3.Another Russian Guy 4.Russian Guy who speaks English 5. A 6 year old kid who slept with your mother 11/10 Too much Salt and \u0421*\u043a\u0430 \u0411\u043b\u0438@\u0442,not enough \u2665\u2665\u2665\u2665\u2665 Would love to take another dose of salt and punch my little brother',
 'review_url': 'http://steamcommunity.com/profiles/76561198092689293/recommended/730/',
 'total_found_helpful_candidates': u'12327'}