In [27]:
import pandas as pd
import os
from dotenv import find_dotenv, load_dotenv
import requests
import time

In [19]:
find_dotenv()

load_dotenv()

api_key = os.getenv("API_KEY")

In [25]:
api_key

'e87ecc3e43b943deac5d46ec94d5fb39'

In [None]:
2019, 2018, 2017, 2016, 2015

In [33]:
# Base URL for the games endpoint
base_url = "https://api.rawg.io/api/games"

# Define the range of years you want to retrieve data for
years = [1999, 1998, 1997, 1996, 1995, 1994, 1993, 1992, 1991, 1990, 1989, 1988, 1987, 1986, 1985, 1984, 1983, 1982, 1981, 1980]

# Define the directory to save the CSV files
output_dir = os.path.join("data", "raw")

# Create the directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Function to make requests with retries
def make_request(url, params, retries=5, backoff_factor=1.0):
    for i in range(retries):
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            return response
        except requests.exceptions.HTTPError as http_err:
            if response.status_code == 404:
                print(f"404 Error: {response.json()['detail']}. No more pages available.")
                return None
            elif response.status_code == 502:
                print(f"502 Server Error: {http_err}. Retrying in {backoff_factor * (2 ** i)} seconds...")
                time.sleep(backoff_factor * (2 ** i))
            else:
                print(f"HTTP error occurred: {http_err}")
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}. Retrying in {backoff_factor * (2 ** i)} seconds...")
            time.sleep(backoff_factor * (2 ** i))
    print("Maximum retries exceeded for current request.")
    return None

for year in years:
    # Define default parameters
    params = {
        "key": api_key,
        "page": 1,
        "page_size": 40,  # Maximize the number of results per page
        "dates": f"{year}-01-01,{year}-12-31"  # Filter games by the current year's release date
    }

    # Initialize an empty list to store all game results
    all_games = []

    while True:
        # Send GET request with parameters
        response = make_request(url=base_url, params=params)
        
        if response is None:
            break  # Exit the loop if a 404 error or max retries exceeded
        
        # Parse the JSON response
        data = response.json()
        
        # Add the results to the all_games list
        all_games.extend(data['results'])
        
        # Check if there are more results to fetch
        if 'next' in data and data['next']:
            params['page'] += 1  # Increment the page number for the next request
        else:
            break  # Exit the loop if no more results are available

    # Create a dataframe from the collected results
    games_df = pd.DataFrame(all_games)
     
    # Define the CSV file name for the current year
    csv_file_name = os.path.join(output_dir, f"games_{year}.csv")
    
    # Save the dataframe to a CSV file
    games_df.to_csv(csv_file_name, index=False)
    
    # Print the number of retrieved games and the CSV file name
    print(f"Retrieved {len(all_games)} games released in {year} and saved to {csv_file_name}")


Retrieved 813 games released in 1999 and saved to data\raw\games_1999.csv
Retrieved 752 games released in 1998 and saved to data\raw\games_1998.csv
Retrieved 881 games released in 1997 and saved to data\raw\games_1997.csv
Retrieved 791 games released in 1996 and saved to data\raw\games_1996.csv
Retrieved 878 games released in 1995 and saved to data\raw\games_1995.csv
Retrieved 839 games released in 1994 and saved to data\raw\games_1994.csv
Retrieved 761 games released in 1993 and saved to data\raw\games_1993.csv
Retrieved 672 games released in 1992 and saved to data\raw\games_1992.csv
Retrieved 589 games released in 1991 and saved to data\raw\games_1991.csv
Retrieved 545 games released in 1990 and saved to data\raw\games_1990.csv
Retrieved 440 games released in 1989 and saved to data\raw\games_1989.csv
Retrieved 325 games released in 1988 and saved to data\raw\games_1988.csv
Retrieved 348 games released in 1987 and saved to data\raw\games_1987.csv
Retrieved 254 games released in 1986 a

In [35]:
df = pd.read_csv("data/raw/games_2020.csv")