In [1]:
from bs4 import BeautifulSoup
import requests
import time
%load_ext jupyternotify

<IPython.core.display.Javascript object>

In [2]:
url = 'https://store.steampowered.com/app/1057090/Ori_and_the_Will_of_the_Wisps/'
response = requests.get(url)
site_content = response.text

In [3]:
soup = BeautifulSoup(site_content, 'html.parser')

In [4]:
soup.title.text

'Ori and the Will of the Wisps on Steam'

# Functions

In [5]:
def clock(func):
    """Decorator function to calculate time taken to run a function

    Args:
        func: A function

    Returns:
        Time taken by the function passed as argument
    """

    def clocked(*args):
        start = time.perf_counter()
        result = func(*args)
        elapsed = time.perf_counter() - start

        if func.__name__ == 'get_fruit_stats':
            print('[%0.8fs] %s - %s' % (elapsed, func.__name__, args))
        else:
            print('[%0.8fs] %s' % (elapsed, func.__name__))
        return result

    return clocked

In [6]:
def get_details(soup):

    game_details = soup.find_all('div', {'class': 'details_block'})[0]

    title = soup.find('div', {'class': 'apphub_AppName'}).text.strip()
    genre = soup.find('b', text='Genre:').find_next().text.strip()
    try:
        release_date = soup.find('div', {'class': 'release_date'}).find('div', {'class': 'date'}).text.strip()
    except:
        release_date = None

    dev = soup.find('div', {'id': 'developers_list'}).text.strip()
    publisher = soup.find('div', {'class': 'subtitle column'}, text='Publisher:').find_next().text.strip()
    
    return title, genre, release_date, dev, publisher

In [7]:
def get_price(soup):
    
    s = soup.find('div', {'class': 'game_purchase_action'})
    
    try:
        original_price = s.find('div', {'class': 'discount_original_price'}).text.strip()
        discounted_price = s.find('div', {'class': 'discount_final_price'}).text.strip()
    except:
        original_price = s.find('div', {'class': 'game_purchase_price price'}).text.strip()

        if 'Free' in original_price:
            original_price = 'Free'

        discounted_price = original_price       

    return original_price, discounted_price


In [8]:
def get_metacritic_score(soup):
    try:
        return int(soup.find('div', {'id': 'game_area_metascore'}).find('div', {'class': 'score'}).text.strip())
    except:
        return 0

In [9]:
def get_ratings(soup):
    arr = []
    
    for review in soup.find_all('div', {'class': 'user_reviews_summary_row'}):
        for i in review.find('div', {'class': 'summary column'}).find_all('span')[: 2]:
            i = i.text.strip()
            if i.startswith('('):
                arr.append(int(i[1: -1].replace(',', '')))
            else:
                arr.append(i)
    
    if len(arr) == 2:
        arr += arr

    return arr

In [10]:
def get_tags(soup):
    return [x.text.strip() for x in soup.find('div', {'class': 'glance_tags popular_tags'}).find_all('a')]

In [11]:
def get_awards(soup):
    try: 
        return soup.find_all('div',
                             {'class': 'block responsive_apppage_reviewblock'})[1].find('div',
                             {'class': 'game_page_autocollapse'})
    except:
        return None

In [12]:
def get_min_requirements(soup):
    try:
        min_requirements = soup.find('div', {'class': 'game_area_sys_req_leftCol'})
        return [x.text for x in min_requirements.find('ul').find_all('li')]
    except:
        return None

In [13]:
def get_recommended_requirements(soup):
    try:
        min_requirements = soup.find('div', {'class': 'game_area_sys_req_rightCol'})
        return [x.text for x in min_requirements.find('ul').find_all('li')]
    except:
        return None

In [14]:
def get_platforms(soup):
    try:
        return [x['class'][1] for x in soup.find('div', {'class': 'game_area_purchase_platform'}).find_all('span')]
    except:
        return None

In [15]:
def get_game_description(soup):
    return soup.find('div', {'class': 'game_area_description'}).find('h2').next_element.next_element.strip()

In [16]:
def get_similar_games(soup):

    similar_games_link = soup.find('div', {'id': 'recommended_block'}).find('a')['href']
    site_content = requests.get(similar_games_link)

    sg_soup = BeautifulSoup(site_content.text, 'html.parser')
    list_of_games = sg_soup.find('div', {'id': 'released'}).find_all('div', {'class': 'similar_grid_item'})

    return [int(game.find('div', {'class': 'similar_grid_capsule'})['data-ds-appid']) for game in list_of_games]


In [17]:
def create_soup_object(game_id):
    
    url = f"https://store.steampowered.com/app/{game_id}/"
    site_content = requests.get(url)
    
    return BeautifulSoup(site_content.text, 'html.parser')

In [18]:
def collect_game_data(game_id):
    global list_of_ids, error_games
    
    soup = create_soup_object(game_id)
    
    try:
        title, genre, release_date, dev, publisher = get_details(soup)
        print(f"{title}\t---\t{game_id}")
        original_price, discounted_price = get_price(soup)
        metacritic_score = get_metacritic_score(soup)

        tags = get_tags(soup)
        awards = get_awards(soup)

        ratings = get_ratings(soup)
        description = get_game_description(soup)

        platforms = get_platforms(soup)
        min_reqs = get_min_requirements(soup)
        rec_reqs = get_recommended_requirements(soup)

        similar_games = get_similar_games(soup)

        for x in similar_games:
            list_of_ids.append(x)

        return {
            'id': game_id,
            'title': title,
            'genre': genre,
            'original_price': original_price,
            'discounted_price': discounted_price,
            'release_date': release_date,
            'developer': dev,
            'publisher': publisher,            
            'overall_rating': ratings[0],
            'overall_vote_count': ratings[1],
            'recent_rating': ratings[2],
            'recent_vote_count': ratings[3],
            'metacritic_score': metacritic_score,
            'tags': tags,
            'awards': awards,
            'description': description,
            'platforms': platforms,
            'minimum_requirements': min_reqs,
            'recommended_requirements': rec_reqs,
            'similar_games': similar_games
        }
    except:
        error_games.append(game_id)
        return None

In [19]:
list_of_games = []
list_of_ids = list()
error_games = []
set_of_ids = set()

In [20]:
game_id = 239140

In [21]:
list_of_ids.append(game_id)
list_of_ids

[239140]

In [22]:
print(len(list_of_ids))
list_of_ids = list(set(list_of_ids) - set_of_ids)
len(list_of_ids)

1


1

In [23]:
%%notify
counter = 0
counter_limit = 2
interval = 50

for game_id in list_of_ids:
   
    if game_id in set_of_ids or game_id in error_games:
        continue

    time.sleep(1)
    game_data = collect_game_data(game_id)
    set_of_ids.add(game_id)
    list_of_games.append(game_data)
    
    if counter % interval == 0:
        print(f"----- {counter} -----")
    
    
    counter += 1
    if counter == counter_limit:
        break

Dying Light	---	239140
----- 0 -----
GTFO	---	493520


<IPython.core.display.Javascript object>

In [24]:
game_id

493520

In [27]:
try:
    list_of_ids.remove(40960)
except:
    pass

try:
    list_of_ids.remove(49540)
except:
    pass

try:
    list_of_ids.remove(game_id)
except:
    pass

In [25]:
len(list_of_games)

2

In [26]:
list_of_games

[{'id': 239140,
  'title': 'Dying Light',
  'genre': 'Action',
  'original_price': '₹ 1,000',
  'discounted_price': '₹ 1,000',
  'release_date': '26 Jan, 2015',
  'developer': 'Techland',
  'publisher': 'Techland Publishing',
  'overall_rating': 'Overwhelmingly Positive',
  'overall_vote_count': 5114,
  'recent_rating': 'Very Positive',
  'recent_vote_count': 131692,
  'metacritic_score': 87,
  'tags': ['Horror',
   'Survival Horror',
   'Zombies',
   'Online Co-Op',
   'PvP',
   'Parkour',
   'First-Person',
   'Open World Survival Craft',
   'PvE',
   'Open World',
   'Action',
   'FPS',
   'Gore',
   'Stealth',
   'Survival',
   'Story Rich',
   'Post-apocalyptic',
   'Hack and Slash',
   'Action RPG',
   'RPG'],
  'awards': <div class="game_page_autocollapse" style="max-height: 400px;">
  <img src="https://steamcdn-a.akamaihd.net/steam/apps/239140/extras/dl_laur_en.png?t=1594413193"><br/><img src="https://steamcdn-a.akamaihd.net/steam/apps/239140/extras/winners_new.png?t=1594413193

In [116]:
import pandas as pd

In [117]:
for _ in range(list_of_games.count(None)):
    list_of_games.remove(None)

In [118]:
df = pd.DataFrame(list_of_games)

In [119]:
df.to_csv('temp.csv', header=True, index=True)

In [33]:
s = '(マブラヴ)-_!=/#?""'
for i in s:
    print(ord(i))

40
12510
12502
12521
12532
41
45
95
33
61
47
35
63
34
34
