In [19]:
import requests
import pandas as pd
import time
import os


API_KEY = '7786b673e4f940ac9e06734f1675b653'

def collect_games_by_year_range(start_year, end_year, max_pages=50):
    all_games = []
    for page in range(1, max_pages + 1):
        url = 'https://api.rawg.io/api/games'
        params = {
            'key': API_KEY,
            'dates': f'{start_year}-01-01,{end_year}-12-31',
            'ordering': '-added',
            'page_size': 40,
            'page': page
        }
        response = requests.get(url, params=params)
        if response.status_code != 200:
            print(f"Error on page {page}: {response.status_code}")
            break
        data = response.json()

        # consider if there is no data
        for game in data['results']:
            all_games.append({
                'name': game['name'],
                'released': game.get('released'),
                'platforms': [p['platform']['name'] for p in game['platforms']] if game.get('platforms') else [],
                'metacritic': game.get('metacritic'),
                'rating': game['rating'],
                'ratings_count': game['ratings_count'],
                'playtime': game.get('playtime', 0),
                'tags': [tag['name'] for tag in game['tags']] if game.get('tags') else [],
                'genres': [genre['name'] for genre in game['genres']] if game.get('genres') else []
            })

        time.sleep(4)  # delay considering request speed limit

    return all_games

# collection Loops by year section
all_data = []
ranges = [
    #(1900, 1999, 200),
    #(2000, 2004, 200),
    #(2005, 2009, 200),
    #(2010, 2014, 200),
    #(2015, 2019, 200),
    (2020, 2024, 200)
]

# 1. load existing file
if os.path.exists("raw_rawg.csv"):
    existing_df = pd.read_csv("raw_rawg.csv")
    print(f"load existing file : {len(existing_df)} rows")
else:
    existing_df = pd.DataFrame()
    print("no existing file, make a new one")

# 2. collect new data
all_data = []
for start, end, pages in ranges:
    print(f"\n Collecting {start}–{end} ({pages} pages)...")
    all_data.extend(collect_games_by_year_range(start, end, max_pages=pages))

new_df = pd.DataFrame(all_data)
print(f"complete collecting new data: {len(new_df)} rows")

# 3. merge
combined_df = pd.concat([existing_df, new_df], ignore_index=True)

# 4. save
combined_df.to_csv("raw_rawg.csv", index=False)
print(f"save, total {len(combined_df)} games saved.")


#for start, end, pages in ranges:
#    print(f"Collecting {start}–{end} ({pages} pages)...")
#    all_data.extend(collect_games_by_year_range(start, end, max_pages=pages))

# save data
#df = pd.DataFrame(all_data)
#df.to_csv("raw_rawg.csv", index=False)
#print("Saved", len(df), "games.")


load existing file : 45607 rows

 Collecting 2020–2024 (200 pages)...
complete collecting new data: 8000 rows
save, total 53607 games saved.
