## Get info of each game
Here are some examples of responded API:
- https://api.rawg.io/api/games/rimworld
- https://api.rawg.io/api/games/grand-theft-auto-v
- https://rawg.io/games/grand-theft-auto-v

In [1]:
import csv
import requests
import json
from pprint import pprint
from time import time
import concurrent.futures
import functools
import os

with open("../secret.json", "r") as f:
    API_KEY = json.load(f)["API_KEY"]

### Load CSV file which has game's id and its name

In [2]:
csv_data = []
with open("../data/game_id.csv", "r") as f:
    csv_data = list(csv.reader(f))

# Preview
print(*csv_data[:10], sep="\n")

['3498', 'grand-theft-auto-v']
['3328', 'the-witcher-3-wild-hunt']
['4200', 'portal-2']
['4291', 'counter-strike-global-offensive']
['5286', 'tomb-raider']
['13536', 'portal']
['12020', 'left-4-dead-2']
['5679', 'the-elder-scrolls-v-skyrim']
['28', 'red-dead-redemption-2']
['4062', 'bioshock-infinite']


## Multithreading

### This function is responsible for requesting each game and save as a JSON file in `/data/game_info/`

In [None]:
def worker(start_index, games_per_worker, urls, downloaded_files, headers):
    for url in urls[start_index : start_index + games_per_worker]:
        if url.rsplit("/")[-1] in downloaded_files: continue
        try:
            # Request API
            json_data = json.loads(requests.get(url, headers=headers).text)
            
            # Save as JSON file
            name = json_data["id"]
            with open(f"../data/game_info/{name}.json","w", encoding="utf-8") as f:
                json.dump(json_data, f)
        except:
            print(f"Failed", end=" ") 
    # Verbose notification
    print(
        f"Done from {start_index} to {start_index + urls_per_worker}")

In [7]:
# Create folder if not existed
if not os.path.exists('../data/game_info/'):
    os.makedirs('../data/game_info/')

#### Threading Preparation

In [8]:
headers = { 'User-Agent': 'App Name: Education purpose',}
params = {"key": API_KEY}
include = {"id",
           "slug",
           "name",
           "metacritic",
           "released",
           "tba",
           "updated",
           "website",
           "rating",
           "rating_top",
           "added_by_status",
           "playtime",
           "achievements_count",
           "ratings_count",
           "suggestions_count",
           "game_series_count",
           "reviews_count",
           "platforms",
           "developers",
           "genres",
           "publishers",
           "esrb_rating",
           }

# Set up number of workers
max_workers = 64
start_game_index = 0
end_game_index = len(csv_data)
number_of_games = end_game_index - start_game_index
games_per_worker = int(number_of_games/max_workers) + 1 
start_index = range(start_game_index, end_game_index, games_per_worker)

# Skip downloaded files
downloaded_files = {file.split(".",1)[0] for file in os.listdir("../data/game_info/")}
game_ids = [game_id for game_id, _ in csv_data if game_id not in downloaded_files]



# Make urls
base_url = r"https://api.rawg.io/api/games/{i}?key={key}"
urls = [base_url.format(i=game_id, key=API_KEY) for game_id in game_ids]
number_of_games = len(urls)
games_per_worker = int(number_of_games/max_workers) + 1 
start_index = range(start_game_index, number_of_games, games_per_worker)

print(f"There are {len(urls)} urls, {max_workers} workers. Thus, each worker will request {games_per_worker} urls")

print(len(urls))
print(*urls[:10], sep="\n")

There are 18378 urls, 64 workers. Thus, each worker will request 288 urls
18378
https://api.rawg.io/api/games/428056?key=c542e67aec3a4340908f9de9e86038af
https://api.rawg.io/api/games/662484?key=c542e67aec3a4340908f9de9e86038af
https://api.rawg.io/api/games/662483?key=c542e67aec3a4340908f9de9e86038af
https://api.rawg.io/api/games/662481?key=c542e67aec3a4340908f9de9e86038af
https://api.rawg.io/api/games/662480?key=c542e67aec3a4340908f9de9e86038af
https://api.rawg.io/api/games/662479?key=c542e67aec3a4340908f9de9e86038af
https://api.rawg.io/api/games/662478?key=c542e67aec3a4340908f9de9e86038af
https://api.rawg.io/api/games/662475?key=c542e67aec3a4340908f9de9e86038af
https://api.rawg.io/api/games/661010?key=c542e67aec3a4340908f9de9e86038af
https://api.rawg.io/api/games/661009?key=c542e67aec3a4340908f9de9e86038af


In [None]:
# Time
t0 = time()
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
    temp = functools.partial(worker,
                             games_per_worker=games_per_worker,
                             urls=urls,
                             downloaded_files=downloaded_files,
                             headers=headers,
                            )
    executor.map(temp, start_index)
    
# Time
print(f"Time taken: {time()-t0}")