# This script scrapes the top sellers of special deals on steam

The data output contains the game name, the original price and the discounted price

In [1]:
import requests
import csv
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import concurrent.futures
import threading
import time
import json

In [2]:
# Obtain the source
api_url = "https://store.steampowered.com/contenthub/querypaginated/specials/TopSellers/render/?query=&start=0&count=15&cc=CA&l=english&v=4&tag="

In [3]:
# Get the data (only the resulting html page from JSON)
def get_data(url):
    r = requests.get(url)
    # Store the data as a dictionary
    data = dict(r.json())
    return data['results_html']

In [4]:
# Get the total number of results
def total_results(url):
    r = requests.get(url)
    # Store the data as a dictionary
    data = dict(r.json())
    total_results = data['total_count']
    return int(total_results)

total_results = total_results(api_url)
total_results

11470

In [5]:
# Get the number of games per page
def page_size(url):
    r = requests.get(url)
    # Store the data as a dictionary
    data = dict(r.json())
    page_size = data['pagesize']
    return int(page_size)

games_per_page = page_size(api_url)
games_per_page

15

In [6]:
# Parse to get all the game details
def parse(url):
    game_list = []
    soup = BeautifulSoup(url, 'lxml')
    # Since the get_data function filtered out all anchor texts
    games = soup.find_all('a')
    for game in games:
        title = game.find('div', class_ = 'tab_item_name').text.replace(":", "")
        
        try:
            cur_before_disc = game.find('div', class_ = 'discount_original_price').text.strip().replace("CND$", "")
            cur_before_disc = cur_before_disc.replace("CDN$", "")
        
        except:
            cur_before_disc = "Not Found"
        
        try:
            cur_discount_pct = game.find('div', class_ = 'discount_pct').text.replace("-", "").replace("%", "")
            cur_after_disc = game.find('div', class_ = 'discount_final_price').text
            cur_after_disc = cur_after_disc.replace("CDN$", "")
            
        except:
            cur_discount_pct = 0
            cur_after_disc = cur_before_disc
        
        link = game['href']
        game_list.append([title, cur_before_disc, cur_discount_pct, cur_after_disc, link])
        
    return game_list

In [7]:
# Create a list to store all the API queries
pages = []

for x in range(0, total_results, games_per_page):
    pages.append(f"https://store.steampowered.com/contenthub/querypaginated/specials/TopSellers/render/?query=&start={x}&count=15&cc=CA&l=english&v=4&tag=")
    
with concurrent.futures.ThreadPoolExecutor() as executor:
    results = executor.map(get_data, pages)
    
pages = list(results)
    

In [8]:
# Create a threading pool and feed the queries to BeautifulSoup
with concurrent.futures.ThreadPoolExecutor() as executor:
    results = executor.map(parse, pages)

# Create a final list to convert into DataFrame
final_list = []
    
for result in results:
    final_list += result
    
final_list = pd.DataFrame(final_list, columns = ['Name', 'Before Discount $', 
                                                 "Discount Percentage %", 'After Discount $', 'Link'])
final_list

Unnamed: 0,Name,Before Discount $,Discount Percentage %,After Discount $,Link
0,Raft,21.99,15,18.69,https://store.steampowered.com/app/648800/Raft...
1,Ready or Not,45.99,10,41.39,https://store.steampowered.com/app/1144200/Rea...
2,Sea of Thieves,49.99,50,24.99,https://store.steampowered.com/app/1172620/Sea...
3,MONSTER HUNTER RISE,79.99,49,40.79,https://store.steampowered.com/app/1446780/MON...
4,God of War,59.99,20,47.99,https://store.steampowered.com/app/1593500/God...
...,...,...,...,...,...
11465,Superfighters Deluxe,11.49,60,4.59,https://store.steampowered.com/app/855860/Supe...
11466,Refactor,5.69,20,4.55,https://store.steampowered.com/app/1664670/Ref...
11467,Whiplash - Crash Valley,5.69,90,0.56,https://store.steampowered.com/app/552130/Whip...
11468,Lifeslide,11.49,50,5.74,https://store.steampowered.com/app/956140/Life...


In [9]:
# Export to CSV
final_list.to_csv('Games Special.csv', index = False, encoding = "utf-8-sig")