In [1]:
import pandas as pd

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 50)

In [2]:
# Read in the IGDB credentials, stored as environmental variables, and create the connection string to IGDB
import os

client_id = os.getenv('IGDB_CLIENT_ID')
client_secret = os.getenv('IGDB_CLIENT_SECRET')

conn_string = f'https://id.twitch.tv/oauth2/token?client_id={client_id}&client_secret={client_secret}&grant_type=client_credentials'

In [3]:
# Connect to IGDB and get an access token

import requests

response = requests.post(conn_string)

access_token = response.json()['access_token']

In [8]:
def hit_api(endpoint, offset=0, query_size=500):
    """
    Access the IGDB API at a certain endpoint once.
    
    Parameters
    ----------
    endpoint : str
        The name of the endpoint in the IGDB API.
    offset : int, default 0
        The data row from which to start the query. Normally a multiple of query_size.
    query_size : int, default 500
        How many data rows should this function retrieve.
    
    Returns
    -------
    pandas.DataFrame
        If response was successful, returns a dataframe with the queried data.
        Otherwise returns an empty dataframe.
    """
    data_parameter = f'fields *; limit {query_size}; offset {offset};'
    
    response = requests.post(f'https://api.igdb.com/v4/{endpoint}',
                             headers = {'Client-ID': client_id,
                                        'Authorization': f'Bearer {access_token}'},
                             data = data_parameter)
    if response.status_code == 200:
        return pd.DataFrame(response.json())
    else:
        print (f'Failed request at offset {offset}')
        return pd.DataFrame()

In [5]:
import time

def import_igdb(endpoint='games', runs=-1):
    """
    Import an entire endpoint from IGDB by calling the hit_api function.
    
    Allows to set the number of times the given endpoint is accessed.
    
    Parameters
    ----------
    endpoint : str, default 'games'
        The name of the endpoint to be accessed at IGDB. For possible values, see https://api-docs.igdb.com/#endpoints.
    runs : int, default -1
        The number of times the same endpoint will be accessed, retrieving data from the first row of the data set onwards.
        Each access will retrieve up to 500 rows. If set to a negative number, the function will retreive the entire data
        of the endpoint.
        
    Returns
    -------
    pandas.DataFrame
        A dataframe containing the retreived data from the designated endpoint.
    """
    
    requests_per_second = 4 # the API accepts only 4 requests per second
    delay_between_requests = 1.0 / requests_per_second
    
    query_size = 500
    offset = 0
    igdb_df = pd.DataFrame() # initializes the return value of the function 
    
    # as long as the function hasn't reached the number of runs passed to it, it will continue to query the API
    # and concatenate the results into the variable it returns (i.e., igdb_df).
    while runs != 0:
        
        # access the next batch of data points
        next_api_group = hit_api(endpoint, offset, query_size)
        
        # concatenate this newest batch into the ever-growing return value
        igdb_df = pd.concat([igdb_df, next_api_group], ignore_index=True)
        
        # if the size of the new batch is smaller than the query_size, it means that there are no further data points to
        # retrieve, so the function breaks out of the while loop
        if len(next_api_group) < query_size:
            break
        
        # otherwise, there are more data points to retreive, so the function adjusts the offset appropriately, pauses
        # to remain within the request per second rate, and reduce the number of runs left by one.
        else:
            offset += query_size
            time.sleep(delay_between_requests)
            runs -= 1
            
    return igdb_df    

In [None]:
# Imports an entire endpoint from IGDB and saves it to a variable called df

df = import_igdb('genres')

In [None]:
# write  data to csv file
df.to_csv('genres.csv', index=False)