In [77]:
import json
import pandas as pd
import numpy as np
import urllib
import requests
from bs4 import BeautifulSoup
import re
import datetime
import steampi.api
import time

In [115]:
with open('../GitHub/steam-api/data/20190623_steam_catalog.json') as steam_games_jsonfile:
    steam_data = json.load(steam_games_jsonfile)

In [117]:
num_content = len(steam_data.keys())
num_content

80923

Total number of purchaseable items is more than 80,000 but this includes DLC!

Need to make a decision: Treat DLC as games as well? This is true in some cases but it is not universally applicable. Many games have costume-only DLC.

In [118]:
steam250_url = 'https://steam250.com/most_played'
#Top 250 most played Steam games, based on average number of concurrent players in the last seven days.

In [119]:
response = requests.get(steam250_url)

In [120]:
soup = BeautifulSoup(response.text, 'html.parser')
type(soup)

bs4.BeautifulSoup

In [121]:
title_data = soup.find_all('span',class_='title')

In [122]:
appid_data = [span.a for span in title_data]
appid_string = ''.join(str(x) for x in appid_data)
top_250_ids = re.findall(r'app\/(\d+)', appid_string)
top_250_ids[:3]

['570', '730', '578080']

In [123]:
#soup.find_all('a',class_='genre') 
#abandoning the genre find_all because there is a better data source out there... and in the worst case, I can label them manually

In [124]:
top_250_titles = re.findall(r'<a href[^>]*>([^<]+)<\/a>',appid_string)
top_250_titles[:3]

['Dota 2', 'Counter-Strike: Global Offensive', "PLAYERUNKNOWN'S BATTLEGROUNDS"]

In [125]:
release_data = soup.find_all('span',class_='date')
len(release_data) #For release date data, 1 entry is missing! (for the game Don't Starve)

249

In [126]:
release_data.insert(111,'<span class="date" title="23 Apr 2013"><a href="/2013"> Apr 2013 </a></span>')

In [127]:
print(release_data[111])
print(len(release_data))

<span class="date" title="23 Apr 2013"><a href="/2013"> Apr 2013 </a></span>
250


In [128]:
release_data_string = ''.join(str(x) for x in release_data)
top_250_release_date_data = re.findall(r'title="([^<]+)"',release_data_string)

In [129]:
def make_date(x):
    if x=='NA':
        dt_str = 'NA'
    
    elif x=='':
        dt_str = 'NA'
    
    else:
        try:
            dt_obj = datetime.datetime.strptime(x, '%b %d, %Y') #Must switch b and d and also change comma back to blank when not querying the API
            dt_str = '%s/%s/%s' % (dt_obj.month, dt_obj.day, dt_obj.year)
        except ValueError:
            dt_obj = datetime.datetime.strptime(x, '%d %b, %Y') #Must switch b and d and also change comma back to blank when not querying the API
            dt_str = '%s/%s/%s' % (dt_obj.month, dt_obj.day, dt_obj.year)
        
    return dt_str

#top_250_rls_dates = [make_date(x) for x in top_250_release_date_data]

In [130]:
genre_data = soup.find_all('a',class_='genre')
genre_data_string = ''.join(str(x) for x in genre_data)
top_250_genre_data = re.findall(r'<[^>]*>([^<]+)<\/a>',genre_data_string)
top_250_genre_data[:3]

['MOBA', 'FPS', 'Survival']

In [131]:
top_250_games_df = pd.DataFrame()
top_250_games_df['appid'] = top_250_ids
top_250_games_df['titles'] = top_250_titles
#top_250_games_df['genre'] = top_250_genre_data
#top_250_games_df['rls_dates'] = top_250_rls_dates
top_250_games_df[top_250_games_df['appid']=='582660']

Unnamed: 0,appid,titles
42,582660,Black Desert Online


In [63]:
(app_details, is_success, status_code) = steampi.api.load_app_details(730)
app_details.keys()

dict_keys(['type', 'name', 'steam_appid', 'required_age', 'is_free', 'controller_support', 'detailed_description', 'about_the_game', 'short_description', 'supported_languages', 'header_image', 'website', 'pc_requirements', 'mac_requirements', 'linux_requirements', 'developers', 'publishers', 'packages', 'package_groups', 'platforms', 'metacritic', 'categories', 'genres', 'screenshots', 'movies', 'recommendations', 'achievements', 'release_date', 'support_info', 'background', 'content_descriptors'])

In [69]:
print(app_details['name'])
print(app_details['is_free'])
print([app_details['categories'][i]['description'] for i in range(len(app_details['categories']))])
print([app_details['genres'][i]['description'] for i in range(len(app_details['genres']))])
print(app_details['release_date']['date'])

Tom Clancy's Rainbow Six® Siege
False
['Single-player', 'Multi-player', 'Co-op', 'Steam Trading Cards', 'In-App Purchases', 'Partial Controller Support']
['Action']
1 Dec, 2015


In [134]:
title_lst =[]
free_lst =[]
metacritic_lst=[]
cats_lst=[]
genres_lst=[]   
rls_date_lst=[]
error_lst = []

for i in range(250):
    #get app details for each app in the list
    app_id = top_250_ids[i]
    (app_details, is_success, status_code) = steampi.api.load_app_details(app_id)
    
    try:
        #extend list of entries for each app (DLCs skipped because not all games have DLCs and we are not yet interested in delving into granular details)
        title_lst.append(app_details['name'])
        free_lst.append(app_details['is_free'])
        #metacritic_lst.append(app_details['metacritic']['score'])
        cats_lst.append([app_details['categories'][j]['description'] for j in range(len(app_details['categories']))]) #list comp; many games have more than 1 category
        genres_lst.append([app_details['genres'][j]['description'] for j in range(len(app_details['genres']))]) #list comp; many games have more than 1 genre tag on Steam
        rls_date_lst.append(app_details['release_date']['date'])

    except KeyError:
        error_lst.append(app_id)
        title_lst.append('NA')
        free_lst.append('NA')
        cats_lst.append('NA') 
        genres_lst.append('NA')
        rls_date_lst.append('NA')
    
    time.sleep(np.random.randint(1,5))

No data found for appID = 582660 with status code = 200
No data found for appID = 755790 with status code = 200
No data found for appID = 905370 with status code = 200
No data found for appID = 386180 with status code = 200
No data found for appID = 216150 with status code = 200
No data found for appID = 560380 with status code = 200


In [135]:
top_250_games_df['titles']=title_lst
top_250_games_df['is_free']=free_lst
#top_250_games_df['metacritic_score']=metacritic_lst
top_250_games_df['categories']=cats_lst
top_250_games_df['genres']=genres_lst
top_250_games_df['rls_date']=[make_date(x) for x in rls_date_lst]

top_250_games_df.head()

Unnamed: 0,appid,titles,is_free,categories,genres,rls_date
0,570,Dota 2,True,"[Multi-player, Co-op, Steam Trading Cards, Ste...","[Action, Free to Play, Strategy]",7/9/2013
1,730,Counter-Strike: Global Offensive,True,"[Multi-player, Steam Achievements, Full contro...","[Action, Free to Play]",8/21/2012
2,578080,PLAYERUNKNOWN'S BATTLEGROUNDS,False,"[Multi-player, Online Multi-Player, Stats]","[Action, Adventure, Massively Multiplayer]",12/21/2017
3,1046930,Dota Underlords,True,"[Single-player, Multi-player, Online Multi-Pla...","[Casual, Free to Play, Strategy, Early Access]",6/20/2019
4,359550,Tom Clancy's Rainbow Six® Siege,False,"[Single-player, Multi-player, Co-op, Steam Tra...",[Action],12/1/2015


In [136]:
top_250_games_df[top_250_games_df['rls_date']=='NA']

Unnamed: 0,appid,titles,is_free,categories,genres,rls_date
42,582660,,,,,
58,755790,,,,,
71,905370,,,,,
108,386180,,,,,
113,219740,Don't Starve,False,"[Single-player, Full controller support, Steam...","[Adventure, Indie, Simulation]",
155,216150,,,,,
192,560380,,,,,


In [137]:
top_250_games_df.loc[111,'rls_date'] = '23/12/2019'

In [138]:
top_250_games_df.iloc[111,:]

appid                                      594650
titles                              Hunt Showdown
is_free                                     False
categories    [Online Multi-Player, Online Co-op]
genres                     [Action, Early Access]
rls_date                               23/12/2019
Name: 111, dtype: object

In [147]:
top_250_games_df.head()

Unnamed: 0,appid,titles,is_free,categories,genres,rls_date
0,570,Dota 2,True,"[Multi-player, Co-op, Steam Trading Cards, Ste...","[Action, Free to Play, Strategy]",7/9/2013
1,730,Counter-Strike: Global Offensive,True,"[Multi-player, Steam Achievements, Full contro...","[Action, Free to Play]",8/21/2012
2,578080,PLAYERUNKNOWN'S BATTLEGROUNDS,False,"[Multi-player, Online Multi-Player, Stats]","[Action, Adventure, Massively Multiplayer]",12/21/2017
3,1046930,Dota Underlords,True,"[Single-player, Multi-player, Online Multi-Pla...","[Casual, Free to Play, Strategy, Early Access]",6/20/2019
4,359550,Tom Clancy's Rainbow Six® Siege,False,"[Single-player, Multi-player, Co-op, Steam Tra...",[Action],12/1/2015


In [139]:
error_lst

['582660', '755790', '905370', '386180', '216150', '560380']

In [140]:
today = datetime.date.today().strftime('%d%m%Y')

In [141]:
top_250_games_df.to_csv('./data/top_250_steam_games_'+str(today)+'.csv')