In [49]:
# standard library imports
import csv
import datetime as dt
import json
import os
import statistics
import time

# third-party imports
import numpy as np
import pandas as pd
import requests
from requests.exceptions import SSLError
import pickle

# customisations - ensure tables show all columns
pd.set_option("max_columns", 100)

In [50]:
def get_request(url, parameters=None):
    """Return json-formatted response of a get request using optional parameters.
    
    Parameters
    ----------
    url : string
    parameters : {'parameter': 'value'}
        parameters to pass as part of get request
    
    Returns
    -------
    json_data
        json-formatted response (dict-like)
    """
    try:
        response = requests.get(url=url, params=parameters)
    except SSLError as s:
        print('SSL Error:', s)
        
        for i in range(5, 0, -1):
            print('\rWaiting... ({})'.format(i), end='')
            time.sleep(1)
        print('\rRetrying.' + ' '*10)
        
        # recusively try again
        return get_request(url, parameters)
    
    if response:
        return response.json()
    else:
        # response is none usually means too many requests. Wait and try again 
        print('No response, waiting 10 seconds...')
        time.sleep(10)
        print('Retrying.')
        return get_request(url, parameters)

In [51]:
url = "https://steamspy.com/api.php"
parameters = {"request": "all"}

# request 'all' from steam spy and parse into dataframe
json_data = get_request(url, parameters=parameters)
steam_spy_all = pd.DataFrame.from_dict(json_data, orient='index')

# generate sorted app_list from steamspy data
app_list = steam_spy_all[['appid', 'name']].sort_values('appid').reset_index(drop=True)

# export disabled to keep consistency across download sessions
app_list.to_csv('app_list.csv', index=False)

# instead read from stored csv
app_list = pd.read_csv('app_list.csv')

# display first few rows
app_list.head()

Unnamed: 0,appid,name
0,33760,The Search for Amelia Earhart
1,34274,Ecco the Dolphin
2,34275,Gain Ground
3,34277,Shinobi III: Return of the Ninja Master
4,34311,Kid Chameleon


In [52]:
len(app_list)

1000

In [None]:
def get_app_data(start, stop, parser, pause):
    """Return list of app data generated from parser.
    
    parser : function to handle request
    """
    app_data = []
    
    # iterate through each row of app_list, confined by start and stop
    for index, row in app_list[start:stop].iterrows():
        print('Current index: {}'.format(index), end='\r')
        
        appid = row['appid']
        name = row['name']

        # retrive app data for a row, handled by supplied parser, and append to list
        data = parser(appid, name)
        app_data.append(data)

        time.sleep(pause) # prevent overloading api with requests
    
    return app_data

In [None]:
def process_batches(parser, app_list, download_path, data_filename, index_filename,
                    columns, begin=0, end=-1, batchsize=50, pause=1):
    """Process app data in batches, writing directly to file.
    
    parser : custom function to format request
    app_list : dataframe of appid and name
    download_path : path to store data
    data_filename : filename to save app data
    index_filename : filename to store highest index written
    columns : column names for file
    
    Keyword arguments:
    
    begin : starting index (get from index_filename, default 0)
    end : index to finish (defaults to end of app_list)
    batchsize : number of apps to write in each batch (default 100)
    pause : time to wait after each api request (defualt 1)
    
    returns: none
    """
    print('Starting at index {}:\n'.format(begin))
    
    # by default, process all apps in app_list
    if end == -1:
        end = len(app_list) + 1
    
    # generate array of batch begin and end points
    batches = np.arange(begin, end, batchsize)
    batches = np.append(batches, end)
    
    apps_written = 0
    batch_times = []
    
    for i in range(len(batches) - 1):
        start_time = time.time()
        
        start = batches[i]
        stop = batches[i+1]
        
        app_data = get_app_data(start, stop, parser, pause)
        
        rel_path = os.path.join(download_path, data_filename)
        
        # writing app data to file
        with open(rel_path, 'a', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=columns, extrasaction='ignore')
            
            for j in range(3,0,-1):
                print("\rAbout to write data, don't stop script! ({})".format(j), end='')
                time.sleep(0.5)
            
            writer.writerows(app_data)
            print('\rExported lines {}-{} to {}.'.format(start, stop-1, data_filename), end=' ')
            
        apps_written += len(app_data)
        
        idx_path = os.path.join(download_path, index_filename)
        
        # writing last index to file
        with open(idx_path, 'w') as f:
            index = stop
            print(index, file=f)
            
        # logging time taken
        end_time = time.time()
        time_taken = end_time - start_time
        
        batch_times.append(time_taken)
        mean_time = statistics.mean(batch_times)
        
        est_remaining = (len(batches) - i - 2) * mean_time
        
        remaining_td = dt.timedelta(seconds=round(est_remaining))
        time_td = dt.timedelta(seconds=round(time_taken))
        mean_td = dt.timedelta(seconds=round(mean_time))
        
        print('Batch {} time: {} (avg: {}, remaining: {})'.format(i, time_td, mean_td, remaining_td))
            
    print('\nProcessing batches complete. {} apps written'.format(apps_written))

In [24]:
def reset_index(download_path, index_filename):
    """Reset index in file to 0."""
    rel_path = os.path.join(download_path, index_filename)
    
    with open(rel_path, 'w') as f:
        print(0, file=f)
        

def get_index(download_path, index_filename):
    """Retrieve index from file, returning 0 if file not found."""
    try:
        rel_path = os.path.join(download_path, index_filename)

        with open(rel_path, 'r') as f:
            index = int(f.readline())
    
    except FileNotFoundError:
        index = 0
        
    return index


def prepare_data_file(download_path, filename, index, columns):
    """Create file and write headers if index is 0."""
    if index == 0:
        rel_path = os.path.join(download_path, filename)

        with open(rel_path, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=columns)
            writer.writeheader()


def skip_index(download_path, index_filename):
    """Reset index in file to 0."""
    rel_path = os.path.join(download_path, index_filename)
    
    with open(rel_path, 'r') as f:
            index = int(f.readline())
    
    new_index = index + 1
            
    with open(rel_path, 'w') as f:
        print(new_index, file=f)

       



In [3]:
def parse_steam_request(appid, name):
    """Unique parser to handle data from Steam Store API.
    
    Returns : json formatted data (dict-like)
    """
    url = "http://store.steampowered.com/api/appdetails/"
    parameters = {"appids": appid}
    
    json_data = get_request(url, parameters=parameters)
    json_app_data = json_data[str(appid)]
    
    if json_app_data['success']:
        data = json_app_data['data']
    else:
        data = {'name': name, 'steam_appid': appid}
        
    return data


# # Set file parameters
# download_path = '../data/download'
# steam_app_data = 'steam_app_data.csv'
# steam_index = 'steam_index.txt'

# steam_columns = [
#     'name', 'steam_appid',
   
#     'pc_requirements',
    
   
#    'genres'
# ]


# # Overwrites last index for demonstration (would usually store highest index so can continue across sessions)
# reset_index(download_path, steam_index)

# # Retrieve last index downloaded from file
# index = get_index(download_path, steam_index)

# # Wipe or create data file and write headers if index is 0
# prepare_data_file(download_path, steam_app_data, index, steam_columns)

# # Set end and chunksize for demonstration - remove to run through entire app list
# process_batches(
#     parser=parse_steam_request,
#     app_list=app_list,
#     download_path=download_path,
#     data_filename=steam_app_data,
#     index_filename=steam_index,
#     columns=steam_columns,
#     begin=index,
#     end=10,
#     batchsize=5
# )

In [4]:
def parse_steamspy_request(appid, name):
    """Parser to handle SteamSpy API data."""
    url = "https://steamspy.com/api.php"
    parameters = {"request": "appdetails", "appid": appid}
    
    json_data = get_request(url, parameters)
    return json_data

# set files and columns
download_path = '../'
steamspy_data = 'steamspy_data.csv'
steamspy_index = 'steamspy_index.txt'

steamspy_columns = [
    'appid', 'name', 'developer', 'publisher', 'score_rank', 'positive',
    'negative', 'userscore', 'owners', 'average_forever', 'average_2weeks',
    'median_forever', 'median_2weeks', 'price', 'initialprice', 'discount',
    'languages', 'genre', 'ccu', 'tags'
]

# reset_index(download_path, steamspy_index)
index = get_index(download_path, steamspy_index)

# Wipe data file if index is 0
prepare_data_file(download_path, steamspy_data, index, steamspy_columns)

process_batches(
    parser=parse_steamspy_request,
    app_list=app_list,
    download_path=download_path, 
    data_filename=steamspy_data,
    index_filename=steamspy_index,
    columns=steamspy_columns,
    begin=index,
    end=-1,
    batchsize=50,
    pause=0.3
)

NameError: name 'process_batches' is not defined

In [5]:
len(app_list)

NameError: name 'app_list' is not defined

In [None]:
app_list[:-50]

In [6]:
pd.read_csv('../steamspy_data.csv').head(50)

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount,languages,genre,ccu,tags


In [7]:
def process_library_batches(parser, steamid_list, download_path, data_filename, index_filename,
                    columns, begin=0, end=-1, batchsize=50, pause=1):
    """Process library data in batches, writing directly to file.
    
    parser : custom function to format request
    steamid_list : dataframe of steamids
    download_path : path to store data
    data_filename : filename to save library data
    index_filename : filename to store highest index written
    columns : column names for file
    
    Keyword arguments:
    
    begin : starting index (get from index_filename, default 0)
    end : index to finish (defaults to end of steamid_list)
    batchsize : number of libraries to write in each batch (default 50)
    pause : time to wait after each api request (default 1)
    
    returns: none
    """
    print('Starting at index {}:\n'.format(begin))
    
    # by default, process all steamids in steamid_list
    if end == -1:
        end = len(steamid_list) + 1
    
    # generate array of batch begin and end points
    batches = np.arange(begin, end, batchsize)
    batches = np.append(batches, end)
    
    libraries_written = 0
    batch_times = []
    
    for i in range(len(batches) - 1):
        start_time = time.time()
        
        start = batches[i]
        stop = batches[i+1]
        
        library_data = get_library_data(start, stop, parser, pause)
        
        rel_path = os.path.join(download_path, data_filename)
        
        # writing library data to file
        with open(rel_path, 'a', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=columns, extrasaction='ignore')
            
            for j in range(3,0,-1):
                print("\rAbout to write data, don't stop script! ({})".format(j), end='')
                time.sleep(0.5)
            
            writer.writerows(library_data)
            print('\rExported lines {}-{} to {}.'.format(start, stop-1, data_filename), end=' ')
            
        libraries_written += len(library_data)
        
        idx_path = os.path.join(download_path, index_filename)
        
        # writing last index to file
        with open(idx_path, 'w') as f:
            index = stop
            print(index, file=f)
            
        # logging time taken
        end_time = time.time()
        time_taken = end_time - start_time
        
        batch_times.append(time_taken)
        mean_time = statistics.mean(batch_times)
        
        est_remaining = (len(batches) - i - 2) * mean_time
        
        remaining_td = dt.timedelta(seconds=round(est_remaining))
        time_td = dt.timedelta(seconds=round(time_taken))
        mean_td = dt.timedelta(seconds=round(mean_time))
        
        print('Batch {} time: {} (avg: {}, remaining: {})'.format(i, time_td, mean_td, remaining_td))
            
    print('\nProcessing batches complete. {} libraries written'.format(libraries_written))

In [21]:
def get_library_data(start, stop, parser, pause):
    """Return list of library data generated from parser.
    
    parser : function to handle request
    """
    library_data = []
    index = start
    
    # iterate through each row of steamid_list, confined by start and stop
    for steamid in steamid_list[start:stop]:
        print('Current index: {}'.format(index), end='\r')
        index += 1
        
        # retrieve library data for a row, handled by supplied parser, and append to list
        data = parser(steamid)
        library_data.append(data)

        time.sleep(pause) # prevent overloading api with requests
    
    return library_data

In [9]:
def parse_steam_library(steamid):
    """Parser to handle library data."""
    url = "https://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/"
    parameters = {'include_appinfo': True, 'steamid': steamid}
    
    json_data = get_library_request(url, parameters)
    
    if json_data['response']:
        return {'steamid':steamid, 'library':[{'appid': game['appid'], 'name': game['name'], 'hours': game['playtime_forever']} for game in json_data['response']['games']]}
    else:
        return {'steamid':steamid, 'library':'hidden'}

In [38]:
def get_library_request(url, parameters=None):
    """Return json-formatted response of a get request using optional parameters.
    
    Parameters
    ----------
    url : string
    parameters : {'parameter': 'value'}
        parameters to pass as part of get request
    
    Returns
    -------
    json_data
        json-formatted response (dict-like)
    """
    try:
        response = requests.get(
        url,
        {
            **parameters,
            **{
                'key': '9C34DB5DC9F6FB662853AC6217BC048F',
                'format': 'json'
            }
        }
    )
    except SSLError as s:
        print('SSL Error:', s)
        
        for i in range(5, 0, -1):
            print('\rWaiting... ({})'.format(i), end='')
            time.sleep(1)
        print('\rRetrying.' + ' '*10)
        
        # recusively try again
        return get_library_request(url, parameters)
    
    if response:
        return response.json()
    else:
        # response is none usually means too many requests. Wait and try again 
        print('No response, waiting 5 seconds...')
        time.sleep(5)
        print('Retrying.')
#         wait_counter += 1
#         if wait_counter == 2:
#             wait_counter = 0
#             return please_just_work()
#         return get_library_request(url, parameters)
        return please_just_work()

In [11]:
with open('steamid_list.pickle', 'rb') as handle:
   steamid_list = pickle.load(handle)

In [12]:
steamid_list

[76561198219067393,
 76561198148157441,
 76561198993539076,
 76561198247182340,
 76561198278705159,
 76561198306000904,
 76561199041871881,
 76561198398210058,
 76561198313209867,
 76561198170079242,
 76561199187853325,
 76561198424457231,
 76561199073034256,
 76561198088650778,
 76561198128726044,
 76561198886682654,
 76561198311899167,
 76561199160819745,
 76561199085551652,
 76561198401257513,
 76561199063236653,
 76561198809874484,
 76561199122907193,
 76561197992706107,
 76561198836187202,
 76561198372618308,
 76561198063386696,
 76561198998093897,
 76561198147928140,
 76561199100035151,
 76561198168342632,
 76561199221735528,
 76561198129545322,
 76561199056158837,
 76561198110376063,
 76561198894416000,
 76561198134493312,
 76561199123759235,
 76561198157955203,
 76561199096168586,
 76561198797291660,
 76561198800797837,
 76561198036582543,
 76561198332706960,
 76561198352302223,
 76561198018494613,
 76561199085518999,
 76561198330806423,
 76561199208988823,
 76561197991952543,


In [48]:
def please_just_work():
    # set files and columns
    download_path = '../data'
    library_data = 'library_data.csv'
    library_index = 'library_index.txt'
    api_key = '9C34DB5DC9F6FB662853AC6217BC048F'

    library_columns = [
        'steamid', 'library'
    ]

    # # Overwrites last index for demonstration (would usually store highest index so can continue across sessions)
    skip_index(download_path, library_index)

    # reset_index(download_path, library_index)

    index = get_index(download_path, library_index)

    # Wipe data file if index is 0
    prepare_data_file(download_path, library_data, index, library_columns)

    process_library_batches(
        parser=parse_steam_library,
        steamid_list=steamid_list,
        download_path=download_path, 
        data_filename=library_data,
        index_filename=library_index,
        columns=library_columns,
        begin=index,
        end=-1,
        batchsize=5,
        pause=.4
    )
please_just_work()

Starting at index 54224:

Exported lines 54224-54228 to library_data.csv. Batch 0 time: 0:00:06 (avg: 0:00:06, remaining: 1:42:42)
No response, waiting 5 seconds...
Retrying.
Starting at index 54230:

No response, waiting 5 seconds...
Retrying.
Starting at index 54231:

No response, waiting 5 seconds...
Retrying.
Starting at index 54232:

No response, waiting 5 seconds...
Retrying.
Starting at index 54233:

Exported lines 54233-54237 to library_data.csv. Batch 0 time: 0:00:05 (avg: 0:00:05, remaining: 1:27:16)
Exported lines 54238-54242 to library_data.csv. Batch 1 time: 0:00:04 (avg: 0:00:05, remaining: 1:24:13)
Exported lines 54243-54247 to library_data.csv. Batch 2 time: 0:00:04 (avg: 0:00:04, remaining: 1:22:41)
Exported lines 54248-54252 to library_data.csv. Batch 3 time: 0:00:04 (avg: 0:00:04, remaining: 1:22:21)
Exported lines 54253-54257 to library_data.csv. Batch 4 time: 0:00:04 (avg: 0:00:04, remaining: 1:22:05)
Exported lines 54258-54262 to library_data.csv. Batch 5 time: 0:

Exported lines 54572-54576 to library_data.csv. Batch 13 time: 0:00:05 (avg: 0:00:05, remaining: 1:23:06)
Exported lines 54577-54581 to library_data.csv. Batch 14 time: 0:00:05 (avg: 0:00:05, remaining: 1:23:12)
Exported lines 54582-54586 to library_data.csv. Batch 15 time: 0:00:05 (avg: 0:00:05, remaining: 1:22:57)
Exported lines 54587-54591 to library_data.csv. Batch 16 time: 0:00:05 (avg: 0:00:05, remaining: 1:22:47)
Exported lines 54592-54596 to library_data.csv. Batch 17 time: 0:00:05 (avg: 0:00:05, remaining: 1:22:33)
Exported lines 54597-54601 to library_data.csv. Batch 18 time: 0:00:05 (avg: 0:00:05, remaining: 1:22:40)
Exported lines 54602-54606 to library_data.csv. Batch 19 time: 0:00:05 (avg: 0:00:05, remaining: 1:22:30)
Exported lines 54607-54611 to library_data.csv. Batch 20 time: 0:00:05 (avg: 0:00:05, remaining: 1:22:19)
Exported lines 54612-54616 to library_data.csv. Batch 21 time: 0:00:05 (avg: 0:00:05, remaining: 1:22:18)
Exported lines 54617-54621 to library_data.csv

Exported lines 54934-54938 to library_data.csv. Batch 20 time: 0:00:05 (avg: 0:00:05, remaining: 1:15:33)
Exported lines 54939-54943 to library_data.csv. Batch 21 time: 0:00:05 (avg: 0:00:05, remaining: 1:15:33)
Exported lines 54944-54948 to library_data.csv. Batch 22 time: 0:00:05 (avg: 0:00:05, remaining: 1:15:29)
Exported lines 54949-54953 to library_data.csv. Batch 23 time: 0:00:05 (avg: 0:00:05, remaining: 1:15:26)
Exported lines 54954-54958 to library_data.csv. Batch 24 time: 0:00:05 (avg: 0:00:05, remaining: 1:15:19)
No response, waiting 5 seconds...
Retrying.
Starting at index 54960:

No response, waiting 5 seconds...
Retrying.
Starting at index 54961:

No response, waiting 5 seconds...
Retrying.
Starting at index 54962:

No response, waiting 5 seconds...
Retrying.
Starting at index 54963:

Exported lines 54963-54967 to library_data.csv. Batch 0 time: 0:00:05 (avg: 0:00:05, remaining: 1:14:15)
Exported lines 54968-54972 to library_data.csv. Batch 1 time: 0:00:05 (avg: 0:00:05, 

Exported lines 55281-55285 to library_data.csv. Batch 13 time: 0:00:05 (avg: 0:00:05, remaining: 1:10:09)
Exported lines 55286-55290 to library_data.csv. Batch 14 time: 0:00:05 (avg: 0:00:05, remaining: 1:10:01)
Exported lines 55291-55295 to library_data.csv. Batch 15 time: 0:00:05 (avg: 0:00:05, remaining: 1:09:55)
Exported lines 55296-55300 to library_data.csv. Batch 16 time: 0:00:05 (avg: 0:00:05, remaining: 1:09:51)
Exported lines 55301-55305 to library_data.csv. Batch 17 time: 0:00:05 (avg: 0:00:05, remaining: 1:09:46)
Exported lines 55306-55310 to library_data.csv. Batch 18 time: 0:00:05 (avg: 0:00:05, remaining: 1:09:36)
Exported lines 55311-55315 to library_data.csv. Batch 19 time: 0:00:05 (avg: 0:00:05, remaining: 1:09:29)
Exported lines 55316-55320 to library_data.csv. Batch 20 time: 0:00:05 (avg: 0:00:05, remaining: 1:09:27)
Exported lines 55321-55325 to library_data.csv. Batch 21 time: 0:00:05 (avg: 0:00:05, remaining: 1:09:28)
Exported lines 55326-55330 to library_data.csv

Exported lines 55660-55664 to library_data.csv. Batch 7 time: 0:00:06 (avg: 0:00:05, remaining: 1:10:01)
Exported lines 55665-55669 to library_data.csv. Batch 8 time: 0:00:05 (avg: 0:00:05, remaining: 1:09:25)
Exported lines 55670-55674 to library_data.csv. Batch 9 time: 0:00:05 (avg: 0:00:05, remaining: 1:09:13)
Exported lines 55675-55679 to library_data.csv. Batch 10 time: 0:00:05 (avg: 0:00:05, remaining: 1:08:38)
Exported lines 55680-55684 to library_data.csv. Batch 11 time: 0:00:05 (avg: 0:00:05, remaining: 1:08:13)
Exported lines 55685-55689 to library_data.csv. Batch 12 time: 0:00:05 (avg: 0:00:05, remaining: 1:07:52)
Exported lines 55690-55694 to library_data.csv. Batch 13 time: 0:00:05 (avg: 0:00:05, remaining: 1:07:42)
Exported lines 55695-55699 to library_data.csv. Batch 14 time: 0:00:05 (avg: 0:00:05, remaining: 1:07:39)
Exported lines 55700-55704 to library_data.csv. Batch 15 time: 0:00:05 (avg: 0:00:05, remaining: 1:07:20)
Exported lines 55705-55709 to library_data.csv. B

Exported lines 56025-56029 to library_data.csv. Batch 25 time: 0:00:05 (avg: 0:00:05, remaining: 1:00:23)
Exported lines 56030-56034 to library_data.csv. Batch 26 time: 0:00:05 (avg: 0:00:05, remaining: 1:00:18)
Exported lines 56035-56039 to library_data.csv. Batch 27 time: 0:00:05 (avg: 0:00:05, remaining: 1:00:10)
Exported lines 56040-56044 to library_data.csv. Batch 28 time: 0:00:05 (avg: 0:00:05, remaining: 1:00:05)
Exported lines 56045-56049 to library_data.csv. Batch 29 time: 0:00:05 (avg: 0:00:05, remaining: 0:59:58)
Exported lines 56050-56054 to library_data.csv. Batch 30 time: 0:00:05 (avg: 0:00:05, remaining: 0:59:53)
Exported lines 56055-56059 to library_data.csv. Batch 31 time: 0:00:05 (avg: 0:00:05, remaining: 0:59:53)
Exported lines 56060-56064 to library_data.csv. Batch 32 time: 0:00:05 (avg: 0:00:05, remaining: 0:59:54)
Exported lines 56065-56069 to library_data.csv. Batch 33 time: 0:00:05 (avg: 0:00:05, remaining: 0:59:46)
Exported lines 56070-56074 to library_data.csv

Exported lines 56386-56390 to library_data.csv. Batch 20 time: 0:00:05 (avg: 0:00:05, remaining: 0:55:09)
Exported lines 56391-56395 to library_data.csv. Batch 21 time: 0:00:05 (avg: 0:00:05, remaining: 0:55:03)
No response, waiting 5 seconds...
Retrying.
Starting at index 56397:

Exported lines 56397-56401 to library_data.csv. Batch 0 time: 0:00:08 (avg: 0:00:08, remaining: 1:32:51)
Exported lines 56402-56406 to library_data.csv. Batch 1 time: 0:00:05 (avg: 0:00:07, remaining: 1:13:19)
Exported lines 56407-56411 to library_data.csv. Batch 2 time: 0:00:05 (avg: 0:00:06, remaining: 1:08:22)
Exported lines 56412-56416 to library_data.csv. Batch 3 time: 0:00:05 (avg: 0:00:06, remaining: 1:05:12)
Exported lines 56417-56421 to library_data.csv. Batch 4 time: 0:00:08 (avg: 0:00:06, remaining: 1:09:44)
Exported lines 56422-56426 to library_data.csv. Batch 5 time: 0:00:06 (avg: 0:00:06, remaining: 1:08:53)
Exported lines 56427-56431 to library_data.csv. Batch 6 time: 0:00:05 (avg: 0:00:06, rem

Exported lines 56744-56748 to library_data.csv. Batch 25 time: 0:00:05 (avg: 0:00:05, remaining: 0:47:42)
Exported lines 56749-56753 to library_data.csv. Batch 26 time: 0:00:05 (avg: 0:00:05, remaining: 0:47:40)
Exported lines 56754-56758 to library_data.csv. Batch 27 time: 0:00:05 (avg: 0:00:05, remaining: 0:47:33)
Exported lines 56759-56763 to library_data.csv. Batch 28 time: 0:00:05 (avg: 0:00:05, remaining: 0:47:25)
Exported lines 56764-56768 to library_data.csv. Batch 29 time: 0:00:05 (avg: 0:00:05, remaining: 0:47:18)
Exported lines 56769-56773 to library_data.csv. Batch 30 time: 0:00:05 (avg: 0:00:05, remaining: 0:47:12)
Exported lines 56774-56778 to library_data.csv. Batch 31 time: 0:00:05 (avg: 0:00:05, remaining: 0:47:06)
Exported lines 56779-56783 to library_data.csv. Batch 32 time: 0:00:05 (avg: 0:00:05, remaining: 0:46:59)
Exported lines 56784-56788 to library_data.csv. Batch 33 time: 0:00:05 (avg: 0:00:05, remaining: 0:46:55)
Exported lines 56789-56793 to library_data.csv

Exported lines 57119-57123 to library_data.csv. Batch 10 time: 0:00:05 (avg: 0:00:05, remaining: 0:41:24)
Exported lines 57124-57128 to library_data.csv. Batch 11 time: 0:00:05 (avg: 0:00:05, remaining: 0:41:16)
Exported lines 57129-57133 to library_data.csv. Batch 12 time: 0:00:05 (avg: 0:00:05, remaining: 0:41:06)
Exported lines 57134-57138 to library_data.csv. Batch 13 time: 0:00:05 (avg: 0:00:05, remaining: 0:41:19)
Exported lines 57139-57143 to library_data.csv. Batch 14 time: 0:00:06 (avg: 0:00:05, remaining: 0:42:03)
Exported lines 57144-57148 to library_data.csv. Batch 15 time: 0:00:07 (avg: 0:00:05, remaining: 0:43:05)
Exported lines 57149-57153 to library_data.csv. Batch 16 time: 0:00:07 (avg: 0:00:05, remaining: 0:43:55)
Exported lines 57154-57158 to library_data.csv. Batch 17 time: 0:00:05 (avg: 0:00:05, remaining: 0:44:00)
No response, waiting 5 seconds...
Retrying.
Starting at index 57160:

Exported lines 57160-57164 to library_data.csv. Batch 0 time: 0:00:05 (avg: 0:00:0

Exported lines 57480-57484 to library_data.csv. Batch 24 time: 0:00:05 (avg: 0:00:05, remaining: 0:36:09)
Exported lines 57485-57489 to library_data.csv. Batch 25 time: 0:00:05 (avg: 0:00:05, remaining: 0:36:06)
Exported lines 57490-57494 to library_data.csv. Batch 26 time: 0:00:05 (avg: 0:00:05, remaining: 0:35:59)
Exported lines 57495-57499 to library_data.csv. Batch 27 time: 0:00:04 (avg: 0:00:05, remaining: 0:35:50)
Exported lines 57500-57504 to library_data.csv. Batch 28 time: 0:00:05 (avg: 0:00:05, remaining: 0:35:42)
Exported lines 57505-57509 to library_data.csv. Batch 29 time: 0:00:05 (avg: 0:00:05, remaining: 0:35:35)
Exported lines 57510-57514 to library_data.csv. Batch 30 time: 0:00:05 (avg: 0:00:05, remaining: 0:35:30)
Exported lines 57515-57519 to library_data.csv. Batch 31 time: 0:00:05 (avg: 0:00:05, remaining: 0:35:25)
Exported lines 57520-57524 to library_data.csv. Batch 32 time: 0:00:04 (avg: 0:00:05, remaining: 0:35:17)
Exported lines 57525-57529 to library_data.csv

Exported lines 57838-57842 to library_data.csv. Batch 8 time: 0:00:06 (avg: 0:00:06, remaining: 0:37:20)
Exported lines 57843-57847 to library_data.csv. Batch 9 time: 0:00:06 (avg: 0:00:06, remaining: 0:37:15)
Exported lines 57848-57852 to library_data.csv. Batch 10 time: 0:00:05 (avg: 0:00:06, remaining: 0:36:47)
Exported lines 57853-57857 to library_data.csv. Batch 11 time: 0:00:05 (avg: 0:00:06, remaining: 0:36:08)
Exported lines 57858-57862 to library_data.csv. Batch 12 time: 0:00:08 (avg: 0:00:06, remaining: 0:37:02)
Exported lines 57863-57867 to library_data.csv. Batch 13 time: 0:00:07 (avg: 0:00:06, remaining: 0:37:19)
Exported lines 57868-57872 to library_data.csv. Batch 14 time: 0:00:05 (avg: 0:00:06, remaining: 0:36:49)
Exported lines 57873-57877 to library_data.csv. Batch 15 time: 0:00:06 (avg: 0:00:06, remaining: 0:36:49)
Exported lines 57878-57882 to library_data.csv. Batch 16 time: 0:00:08 (avg: 0:00:06, remaining: 0:37:22)
Exported lines 57883-57887 to library_data.csv. 

Exported lines 58183-58187 to library_data.csv. Batch 18 time: 0:00:05 (avg: 0:00:05, remaining: 0:26:18)
Exported lines 58188-58192 to library_data.csv. Batch 19 time: 0:00:05 (avg: 0:00:05, remaining: 0:26:12)
Exported lines 58193-58197 to library_data.csv. Batch 20 time: 0:00:05 (avg: 0:00:05, remaining: 0:26:07)
No response, waiting 5 seconds...
Retrying.
Starting at index 58199:

No response, waiting 5 seconds...
Retrying.
Starting at index 58200:

No response, waiting 5 seconds...
Retrying.
Starting at index 58201:

Exported lines 58201-58205 to library_data.csv. Batch 0 time: 0:00:05 (avg: 0:00:05, remaining: 0:26:53)
Exported lines 58206-58210 to library_data.csv. Batch 1 time: 0:00:05 (avg: 0:00:05, remaining: 0:26:08)
Exported lines 58211-58215 to library_data.csv. Batch 2 time: 0:00:05 (avg: 0:00:05, remaining: 0:26:17)
Exported lines 58216-58220 to library_data.csv. Batch 3 time: 0:00:06 (avg: 0:00:05, remaining: 0:27:27)
Exported lines 58221-58225 to library_data.csv. Batc

Exported lines 58544-58548 to library_data.csv. Batch 20 time: 0:00:05 (avg: 0:00:05, remaining: 0:19:23)
Exported lines 58549-58553 to library_data.csv. Batch 21 time: 0:00:05 (avg: 0:00:05, remaining: 0:19:24)
Exported lines 58554-58558 to library_data.csv. Batch 22 time: 0:00:05 (avg: 0:00:05, remaining: 0:19:26)
Exported lines 58559-58563 to library_data.csv. Batch 23 time: 0:00:05 (avg: 0:00:05, remaining: 0:19:23)
Exported lines 58564-58568 to library_data.csv. Batch 24 time: 0:00:05 (avg: 0:00:05, remaining: 0:19:20)
Exported lines 58569-58573 to library_data.csv. Batch 25 time: 0:00:05 (avg: 0:00:05, remaining: 0:19:14)
Exported lines 58574-58578 to library_data.csv. Batch 26 time: 0:00:05 (avg: 0:00:05, remaining: 0:19:08)
Exported lines 58579-58583 to library_data.csv. Batch 27 time: 0:00:05 (avg: 0:00:05, remaining: 0:19:02)
Exported lines 58584-58588 to library_data.csv. Batch 28 time: 0:00:05 (avg: 0:00:05, remaining: 0:18:59)
Exported lines 58589-58593 to library_data.csv

No response, waiting 5 seconds...
Retrying.
Starting at index 58931:

No response, waiting 5 seconds...
Retrying.
Starting at index 58932:

Exported lines 58932-58936 to library_data.csv. Batch 0 time: 0:00:05 (avg: 0:00:05, remaining: 0:12:46)
Exported lines 58937-58941 to library_data.csv. Batch 1 time: 0:00:05 (avg: 0:00:05, remaining: 0:13:00)
Exported lines 58942-58946 to library_data.csv. Batch 2 time: 0:00:05 (avg: 0:00:05, remaining: 0:12:57)
Exported lines 58947-58951 to library_data.csv. Batch 3 time: 0:00:05 (avg: 0:00:05, remaining: 0:13:08)
Exported lines 58952-58956 to library_data.csv. Batch 4 time: 0:00:05 (avg: 0:00:05, remaining: 0:13:04)
Exported lines 58957-58961 to library_data.csv. Batch 5 time: 0:00:05 (avg: 0:00:05, remaining: 0:13:00)
Exported lines 58962-58966 to library_data.csv. Batch 6 time: 0:00:05 (avg: 0:00:05, remaining: 0:12:53)
Exported lines 58967-58971 to library_data.csv. Batch 7 time: 0:00:05 (avg: 0:00:05, remaining: 0:12:49)
Exported lines 58972

Exported lines 59295-59299 to library_data.csv. Batch 27 time: 0:00:05 (avg: 0:00:05, remaining: 0:07:42)
Exported lines 59300-59304 to library_data.csv. Batch 28 time: 0:00:05 (avg: 0:00:05, remaining: 0:07:38)
Exported lines 59305-59309 to library_data.csv. Batch 29 time: 0:00:05 (avg: 0:00:05, remaining: 0:07:33)
Exported lines 59310-59314 to library_data.csv. Batch 30 time: 0:00:05 (avg: 0:00:05, remaining: 0:07:28)
Exported lines 59315-59319 to library_data.csv. Batch 31 time: 0:00:05 (avg: 0:00:05, remaining: 0:07:23)
Exported lines 59320-59324 to library_data.csv. Batch 32 time: 0:00:05 (avg: 0:00:05, remaining: 0:07:19)
Exported lines 59325-59329 to library_data.csv. Batch 33 time: 0:00:05 (avg: 0:00:05, remaining: 0:07:14)
Exported lines 59330-59334 to library_data.csv. Batch 34 time: 0:00:05 (avg: 0:00:05, remaining: 0:07:10)
Exported lines 59335-59339 to library_data.csv. Batch 35 time: 0:00:05 (avg: 0:00:05, remaining: 0:07:06)
No response, waiting 5 seconds...
Retrying.
St

Exported lines 59664-59668 to library_data.csv. Batch 50 time: 0:00:05 (avg: 0:00:05, remaining: 0:01:52)
Exported lines 59669-59673 to library_data.csv. Batch 51 time: 0:00:05 (avg: 0:00:05, remaining: 0:01:47)
Exported lines 59674-59678 to library_data.csv. Batch 52 time: 0:00:05 (avg: 0:00:05, remaining: 0:01:42)
Exported lines 59679-59683 to library_data.csv. Batch 53 time: 0:00:05 (avg: 0:00:05, remaining: 0:01:37)
Exported lines 59684-59688 to library_data.csv. Batch 54 time: 0:00:05 (avg: 0:00:05, remaining: 0:01:32)
Exported lines 59689-59693 to library_data.csv. Batch 55 time: 0:00:05 (avg: 0:00:05, remaining: 0:01:27)
Exported lines 59694-59698 to library_data.csv. Batch 56 time: 0:00:05 (avg: 0:00:05, remaining: 0:01:22)
Exported lines 59699-59703 to library_data.csv. Batch 57 time: 0:00:05 (avg: 0:00:05, remaining: 0:01:18)
Exported lines 59704-59708 to library_data.csv. Batch 58 time: 0:00:05 (avg: 0:00:05, remaining: 0:01:13)
Exported lines 59709-59713 to library_data.csv

TypeError: 'NoneType' object is not subscriptable

In [41]:
!pip install requests --upgrade

Collecting requests
  Downloading requests-2.26.0-py2.py3-none-any.whl (62 kB)
Collecting charset-normalizer~=2.0.0; python_version >= "3"
  Downloading charset_normalizer-2.0.8-py3-none-any.whl (39 kB)
Installing collected packages: charset-normalizer, requests
  Attempting uninstall: requests
    Found existing installation: requests 2.21.0
    Uninstalling requests-2.21.0:
      Successfully uninstalled requests-2.21.0
Successfully installed charset-normalizer-2.0.8 requests-2.26.0


ERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip will change the way that it resolves dependency conflicts.

We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.

google-api-core 1.22.2 requires protobuf>=3.12.0, but you'll have protobuf 3.11.2 which is incompatible.


In [20]:
# reset_index(download_path, library_index)
index = get_index(download_path, library_index)
index

163

In [11]:
pd.read_csv('../data/library_data.csv').head()

Unnamed: 0,steamid,library
0,76561198219067393,"[{'appid': 220, 'name': 'Half-Life 2', 'hours'..."
1,76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26..."
2,76561198993539076,hidden
3,76561198247182340,hidden
4,76561198278705159,hidden


In [12]:
library_df = pd.read_csv('../data/library_data.csv')

In [13]:
library_df.head(50)

Unnamed: 0,steamid,library
0,76561198219067393,"[{'appid': 220, 'name': 'Half-Life 2', 'hours'..."
1,76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26..."
2,76561198993539076,hidden
3,76561198247182340,hidden
4,76561198278705159,hidden
5,76561198306000904,hidden
6,76561199041871881,hidden
7,76561198398210058,hidden
8,76561198313209867,hidden
9,76561198170079242,"[{'appid': 3830, 'name': 'Psychonauts', 'hours..."
