# Intro
placeholder

# Imports

In [1]:
# standard library imports
import csv
import datetime as dt
import json
import os
import statistics
import time

# third-party imports
import numpy as np
import pandas as pd
import requests

# customisations - ensure tables show all columns
pd.set_option("max_columns", 100)

# Get data
## 1) Create list of games
The API for SteamSpy (https://steamspy.com/api.php) can only return pages of 1000 results when the request is for 'all', so I need to loop the requests to get more than one page.  

In [2]:
def get_request(url, parameters=None):
    """Return json-formatted response of a get request using optional parameters.
    
    Parameters
    ----------
    url : string
    parameters : {'parameter': 'value'}
        parameters to pass as part of get request
    
    Returns
    -------
    json_data
        json-formatted response (dict-like)
    """
    try:
        response = requests.get(url=url, params=parameters)
    except SSLError as s:
        print('SSL Error:', s)
        
        for i in range(5, 0, -1):
            print('\rWaiting... ({})'.format(i), end='')
            time.sleep(1)
        print('\rRetrying.' + ' '*10)
        
        # recusively try again
        return get_request(url, parameters)
    
    if response:
        return response.json()
    else:
        # response is none usually means too many requests. Wait and try again 
        print('No response, waiting 10 seconds...')
        time.sleep(10)
        print('Retrying.')
        return get_request(url, parameters)

To get a mangable amount of useful information, I limit the requests to 35 pages, as anything over that has to little owners to be of use.  
Also, the columns returned by the call are not enough, so I limit the results to store the IDs and the names of the games.

In [19]:
def request_pages():
    pre_app_list=[0]*35
    #Due to the amount of games in the Steam store, I limit the call to the 35000 more ownded games
    for i in range(0,35):
        url = "https://steamspy.com/api.php?request=all&page="+str(i)
        json_data = get_request(url)
        steam_spy_all = pd.DataFrame.from_dict(json_data, orient='index')
        pre_app_list[i] = steam_spy_all[['appid', 'name']].sort_values('appid').reset_index(drop=True)
        
    
    app_list = pd.concat(pre_app_list).reset_index(drop=True)
    return app_list

In [20]:
games_list = request_pages()


In [21]:
games_list

Unnamed: 0,appid,name
0,10,Counter-Strike
1,20,Team Fortress Classic
2,30,Day of Defeat
3,40,Deathmatch Classic
4,50,Half-Life: Opposing Force
...,...,...
34995,1812930,Santhai
34996,1814060,Virus at Home
34997,1816650,Nelson and the Magic Cauldron: The Journey
34998,1828900,Stickit


In [None]:
url = "https://steamspy.com/api.php?request=all&page="+i


# request 'all' from steam spy and parse into dataframe
json_data = get_request(url, parameters=parameters)
steam_spy_all = pd.DataFrame.from_dict(json_data, orient='index')

# # generate sorted app_list from steamspy data
app_list = steam_spy_all[['appid', 'name']].sort_values('appid').reset_index(drop=True)

# # export disabled to keep consistency across download sessions
# app_list.to_csv('app_list.csv', index=False)

# # instead read from stored csv
app_list = pd.read_csv('app_list.csv')

# # display first few rows
app_list.head()