In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import requests
import time
import json
import os
from datetime import datetime

import tftinsights
import tftinsights.riotapi

# DATA Ingest
In this notebook, we are polling the API to gather matches data increamentally from a known match id. 
(Can be found in the game client after playing a game). 

We then search upward to keep gathering matches. Player ranking / skill data is also immedately gathered. 
It is import to note that the player ranking gathered is at the time of lookup, and not at the time of matched played.

The data is saved raw, in case we decided to change imported data format.
If you wish to run this notebook, you'll need to get your own API key and update it in the tftinsights module under tftinsights/riotapi.py
No working API key will be uploaded to Github.

In [3]:
HEADERS = tftinsights.riotapi.HEADERS

MATCH_URL = "https://americas.api.riotgames.com/tft/match/v1/matches/"
PUUID_TO_SUMMONER_URL = "https://na1.api.riotgames.com/tft/summoner/v1/summoners/by-puuid/"
SUMMONER_TO_RANK_URL = "https://na1.api.riotgames.com/tft/league/v1/entries/by-summoner/"

#START_MATCH_ID_NUM = 5278962963
START_MATCH_ID_NUM = 5285677683
MATCH_REGION_PREFIX = "NA1_"
MAX_CONSECUTIVE_SKIPS = 50000
DATA_DIR = "data"
WAIT_TIME = 0.9
CURRENT_SET_NAME = "TFTSet14"

In [4]:
os.makedirs(DATA_DIR, exist_ok=True)

#Find the highest number
existing_ids_numeric = []
for filename in os.listdir(DATA_DIR):
    if filename.startswith(tftinsights.riotapi.MATCH_REGION_PREFIX) and filename.endswith(".json"):
        try:
            num = int(filename.removeprefix(MATCH_REGION_PREFIX).removesuffix(".json"))
            existing_ids_numeric.append(num)
        except ValueError:
            continue

max_id = 0
min_id = 0

if existing_ids_numeric:
    min_id = min(existing_ids_numeric)
    max_id = max(existing_ids_numeric)    
    all_scanned_ids = {f"{MATCH_REGION_PREFIX}{i}" for i in range(min_id, max_id + 1)}
else:
    all_scanned_ids = set()

MATCH_REGION_PREFIX = max_id

def get_match_data(match_id):
    if match_id in all_scanned_ids:
        print(f"⚠️ {match_id} already scanned or skipped.")
        return None

    url = f"{MATCH_URL}{match_id}"
    response = requests.get(url, headers=HEADERS)
    
    for key, value in response.headers.items():
        if 'Rate-Limit' in key:
            print(f'\t{key}: {value}')
        
    time.sleep(WAIT_TIME)  # Avoid hammering the API
    
    if response.status_code == 200:
        data = response.json()
        if data["info"].get("tft_set_core_name") == CURRENT_SET_NAME:
            return data
    elif response.status_code == 429:
        print("Rate limited. Sleeping for 10s...")
        time.sleep(10)
        return get_match_data(match_id)
    elif response.status_code == 404:
        return None
    else:
        print(f"❌ Error {response.status_code} on match {match_id}")
        print(response.text)
    return None


def get_rank_data(puuid):
    status_code, data = tftinsights.riotapi.get_ranked_data_puuid( puuid )
    return status_code,data


def save_match_to_json(match_data, match_id):
    for participant in match_data["info"]["participants"]:
        puuid = participant.get("puuid")
        try:
            code, rank_info = get_rank_data(puuid)            
            rank_info["read_time"] = datetime.now().timestamp()
            if rank_info == None:
                print(f"\t\t⚠️Failed to get rank for {puuid}:\n\t\t{e}")
                rank_info = {}
        except Exception as e:
            print(f"\t\t⚠️Failed to get rank for {puuid}:\n\t\t{e}")
            rank_info = {}
        participant["rank_at_match_time"] = rank_info

    filepath = os.path.join(DATA_DIR, f"{match_id}.json")
    try:
        with open(filepath, "w", encoding='utf-8') as f:
            json.dump(match_data, f, ensure_ascii=False, indent=2)
        print(f"✅Saved {filepath}")
    except Exception as e:
        print(f"❌Failed to save match {match_id}: {e}")


def main():
    current_id = START_MATCH_ID_NUM    
    consecutive_skips = 0

    while consecutive_skips < MAX_CONSECUTIVE_SKIPS:
        match_id = f"{MATCH_REGION_PREFIX}{current_id}"
        print ( "--------------------------------------------")
        print(f"🔍Checking match {match_id}...")

        match_data = get_match_data(match_id)

        if match_data:
            save_match_to_json(match_data, match_id)
            all_scanned_ids.add(match_id)
            consecutive_skips = 0
        else:
            print(f"\t⚠️Skipped {match_id} (Total skips: {consecutive_skips + 1})")
            all_scanned_ids.add(match_id)
            consecutive_skips += 1

        current_id += 1

    print(f"Stopped after {MAX_CONSECUTIVE_SKIPS} consecutive skipped matches.")

In [None]:
main()

--------------------------------------------
🔍Checking match 52856866575285677683...
	X-App-Rate-Limit: 100:120,20:1
	X-App-Rate-Limit-Count: 26:120,1:1
	X-Method-Rate-Limit: 200:10
	X-Method-Rate-Limit-Count: 1:10
	⚠️Skipped 52856866575285677683 (Total skips: 1)
--------------------------------------------
🔍Checking match 52856866575285677684...
	X-App-Rate-Limit: 100:120,20:1
	X-App-Rate-Limit-Count: 27:120,1:1
	X-Method-Rate-Limit: 200:10
	X-Method-Rate-Limit-Count: 2:10
	⚠️Skipped 52856866575285677684 (Total skips: 2)
--------------------------------------------
🔍Checking match 52856866575285677685...
	X-App-Rate-Limit: 100:120,20:1
	X-App-Rate-Limit-Count: 28:120,1:1
	X-Method-Rate-Limit: 200:10
	X-Method-Rate-Limit-Count: 3:10
	⚠️Skipped 52856866575285677685 (Total skips: 3)
--------------------------------------------
🔍Checking match 52856866575285677686...
	X-App-Rate-Limit: 100:120,20:1
	X-App-Rate-Limit-Count: 29:120,1:1
	X-Method-Rate-Limit: 200:10
	X-Method-Rate-Limit-Count