In [1]:
import pandas as pd
import requests
import json
import gzip
import shutil
import time
import os
from io import BytesIO
import numpy as np

In [2]:
S3_BUCKET_URL = "https://power-rankings-dataset-gprhack.s3.us-west-2.amazonaws.com"


def download_gzip_and_write_to_json(file_name):
    # If file already exists locally do not re-download game
    if os.path.isfile(f"{file_name}.json"):
       return

    response = requests.get(f"{S3_BUCKET_URL}/{file_name}.json.gz")
    if response.status_code == 200:
       try:
           gzip_bytes = BytesIO(response.content)
           with gzip.GzipFile(fileobj=gzip_bytes, mode="rb") as gzipped_file:
               with open(f"{file_name}.json", 'wb') as output_file:
                   shutil.copyfileobj(gzipped_file, output_file)
               print(f"{file_name}.json written")
       except Exception as e:
           print("Error:", e)
    else:
       print(f"Failed to download {file_name}")


def download_esports_files():
   directory = "esports-data"
   if not os.path.exists(directory):
       os.makedirs(directory)

   esports_data_files = ["leagues", "tournaments", "players", "teams", "mapping_data"]
   for file_name in esports_data_files:
       download_gzip_and_write_to_json(f"{directory}/{file_name}")


def download_games(year):
    start_time = time.time()
    with open("esports-data/tournaments.json", "r") as json_file:
       tournaments_data = json.load(json_file)
    with open("esports-data/mapping_data.json", "r") as json_file:
       mappings_data = json.load(json_file)

    directory = "games"
    if not os.path.exists(directory):
       os.makedirs(directory)

    mappings = {
       esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
    }

    game_counter = 0

    for tournament in tournaments_data:
       start_date = tournament.get("startDate", "")
       if start_date.startswith(str(year)):
           print(f"Processing {tournament['slug']}")
           for stage in tournament["stages"]:
               for section in stage["sections"]:
                   for match in section["matches"]:
                       for game in match["games"]:
                           if game["state"] == "completed":
                               try:
                                   platform_game_id = mappings[game["id"]]["platformGameId"]
                               except KeyError:
                                   print(f"{platform_game_id} {game['id']} not found in the mapping table")
                                   continue

                               download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")
                               game_counter += 1

                           if game_counter % 10 == 0:
                               print(
                                   f"----- Processed {game_counter} games, current run time: \
                                   {round((time.time() - start_time)/60, 2)} minutes")
                               break 
                             
                                


In [4]:
# Downloads basic data (not per-game data) for all esports games
download_esports_files()
os.chdir('esports-data')
os.listdir()

['leagues.json',
 'mapping_data.json',
 'players.json',
 'teams.json',
 'tournaments.json']

In [4]:
os.chdir('esports-data')
with open("mapping_data.json", "r") as json_file:
   mappings_data = json.load(json_file)

directory = "games"
if not os.path.exists(directory):
   os.makedirs(directory)

mappings = {
   esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
}

In [5]:
# Pick out a specific game ID to analyze what data we get for each game 
platform_game_id = mappings['105596416965466997']['platformGameId']
mappings['110310652412257228']

{'esportsGameId': '110310652412257228',
 'platformGameId': 'ESPORTSTMNT04:2685686',
 'teamMapping': {'200': '109539776003058530', '100': '109539873500788632'},
 'participantMapping': {'3': '107648411781760623',
  '5': '101389608247181162',
  '10': '106302540732814072',
  '2': '110378596829382151',
  '1': '110378594503521823',
  '9': '105655995799330206',
  '7': '109539786056381355',
  '8': '107156535965754342',
  '6': '109704758065825536',
  '4': '103890014776920379'}}

In [33]:
download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")

games/ESPORTSTMNT04:1610258.json written


In [42]:
# Notes: 
# Participants 1-5 are on team 100 and 6-10 are on team 200
participant_mapping = {
    '1': '100_top',
    '2': '100_jungle',
    '3': '100_mid',
    '4': '100_bot',
    '5': '100_support',
    '6': '200_top',
    '7': '200_jungle',
    '8': '200_mid',
    '9': '200_bot',
    '10': '200_support'
}

In [6]:
# with open("games/ESPORTSTMNT04:2685686.json", "r") as json_file:
with open("games/ESPORTSTMNT04:1610258.json", "r") as json_file:
    game_data = json.load(json_file)

In [51]:
game_data[-1] # Game metadata at the end

{'eventTime': '2021-03-08T18:36:43.326Z',
 'eventType': 'game_end',
 'platformGameId': 'ESPORTSTMNT04:1610258',
 'gameTime': 1861532,
 'stageID': 1,
 'wallTime': 1615228603324,
 'sequenceIndex': 3059,
 'gameName': '105596416965466996|game1',
 'winningTeam': 100,
 'playbackID': 1}

In [14]:
game_data[1] # Game metadata at the start

{'eventTime': '2021-03-08T18:05:41.908Z',
 'eventType': 'stats_update',
 'platformGameId': 'ESPORTSTMNT04:1610258',
 'gameTime': 0,
 'participants': [{'magicPenetrationPercent': 0,
   'alive': True,
   'participantID': 1,
   'spellVamp': 0,
   'primaryAbilityResource': 340,
   'cooldownReduction': 0,
   'lifeSteal': 0,
   'primaryAbilityResourceRegen': 0,
   'magicPenetrationPercentBonus': 0,
   'magicPenetration': 0,
   'summonerSpell2CooldownRemaining': 15,
   'healthMax': 590,
   'position': {'z': 581, 'x': 554},
   'magicResist': 32,
   'primaryAbilityResourceMax': 340,
   'armorPenetrationPercentBonus': 0,
   'summonerSpell1Name': 'SummonerFlash',
   'attackDamage': 25,
   'teamID': 100,
   'championName': 'Ornn',
   'ccReduction': 0,
   'armorPenetrationPercent': 0,
   'currentGold': 500,
   'playerName': 'Z10 Kaylem',
   'healthRegen': 0,
   'respawnTimer': 0,
   'attackSpeed': 100,
   'shutdownValue': 0,
   'XP': 0,
   'ultimateCooldownRemaining': 0,
   'summonerSpell1CooldownR

In [45]:
np.unique([x['eventType'] for x in game_data])

"""
NOTES: We are only concerned with the following events
"building_destroyed" - first one for each team 
"champion_kill" - first one for each team 
"epic_monster_kill" - first one for each team
"game_end" - first one for each time 
"stats_update" - multiple instances

We don't care about the other events because they are too granular and don't provide any useful information
"""

array(['building_destroyed', 'champion_kill', 'champion_kill_special',
       'champion_level_up', 'epic_monster_kill', 'epic_monster_spawn',
       'game_end', 'game_info', 'item_destroyed', 'item_purchased',
       'item_sold', 'item_undo', 'queued_dragon_info', 'skill_level_up',
       'stats_update', 'turret_plate_destroyed', 'ward_killed',
       'ward_placed'], dtype='<U22')

In [9]:
important_events = ["building_destroyed", "champion_kill", "epic_monster_kill", "game_end", "stats_update"]

# Use this to examples of the relevant event types in the game
for event in game_data:
    if event['eventType'] in ["building_destroyed"]:
        print(event) 

{'eventTime': '2021-03-08T18:16:15.687Z', 'eventType': 'building_destroyed', 'platformGameId': 'ESPORTSTMNT04:1610258', 'gameTime': 633895, 'teamID': 200, 'sequenceIndex': 1009, 'buildingType': 'turret', 'stageID': 1, 'assistants': [], 'lastHitter': 4, 'gameName': '105596416965466996|game1', 'position': {'z': 4505, 'x': 13866}, 'lane': 'bot', 'turretTier': 'outer', 'playbackID': 1}
{'eventTime': '2021-03-08T18:20:08.456Z', 'eventType': 'building_destroyed', 'platformGameId': 'ESPORTSTMNT04:1610258', 'gameTime': 866666, 'teamID': 200, 'sequenceIndex': 1405, 'buildingType': 'turret', 'assistants': [], 'stageID': 1, 'lastHitter': 4, 'gameName': '105596416965466996|game1', 'position': {'z': 13875, 'x': 4318}, 'lane': 'top', 'turretTier': 'outer', 'playbackID': 1}
{'eventTime': '2021-03-08T18:22:46.851Z', 'eventType': 'building_destroyed', 'platformGameId': 'ESPORTSTMNT04:1610258', 'gameTime': 1025061, 'teamID': 200, 'sequenceIndex': 1683, 'buildingType': 'turret', 'stageID': 1, 'assistants

In [None]:
"""
To process "epic_monster_kill" events we do the following:
1. Check that "monsterType" is in ["riftHerald", "dragon", "baron"] 
2. Check the time of the event ["eventTime"]
3. Check which team killed it ["killerTeamID"]
4. Update the appropriate feature for that team

team_features should be a dictionary with keys for 100 and 200 and values as dictionaries with keys for each feature
for example for epic_monster kill we have features for [first_riftHerald, first_dragon, first_baron, num_riftHeralds, num_dragons, num_barons]
- first_riftHerald_ind = 0 if they got the first rift_herald, 1 otherwise 

To process "building_destroyed" events we do the following:
1. Check the time of the event ["eventTime"]
2. Check whose tower fell ["teamId"]
3. Update the appropriate feature for that team

To process "champion_kill" events we do the following:
1. Check the time of the event ["eventTime"]
2. Check whose champion died ["killerTeamID"]
3. Update the appropriate feature for that team
"""

In [38]:
game_data[5]

{'eventTime': '2021-03-08T18:05:43.823Z',
 'eventType': 'stats_update',
 'platformGameId': 'ESPORTSTMNT04:1610258',
 'gameTime': 2032,
 'participants': [{'magicPenetrationPercent': 0,
   'participantID': 1,
   'alive': True,
   'spellVamp': 0,
   'primaryAbilityResource': 340,
   'cooldownReduction': 0,
   'lifeSteal': 0,
   'primaryAbilityResourceRegen': 16,
   'magicPenetrationPercentBonus': 0,
   'magicPenetration': 0,
   'summonerSpell2CooldownRemaining': 12.96790599822998,
   'healthMax': 590,
   'position': {'z': 611, 'x': 603},
   'magicResist': 32,
   'primaryAbilityResourceMax': 340,
   'armorPenetrationPercentBonus': 0,
   'summonerSpell1Name': 'SummonerFlash',
   'attackDamage': 79,
   'teamID': 100,
   'championName': 'Ornn',
   'ccReduction': 0,
   'armorPenetrationPercent': 0,
   'currentGold': 500,
   'playerName': 'Z10 Kaylem',
   'healthRegen': 18,
   'respawnTimer': 0,
   'attackSpeed': 100,
   'shutdownValue': 0,
   'XP': 0,
   'ultimateCooldownRemaining': 0,
   'sum

In [44]:
game_data[100]

{'eventTime': '2021-03-08T18:06:39.006Z',
 'eventType': 'stats_update',
 'platformGameId': 'ESPORTSTMNT04:1610258',
 'gameTime': 57215,
 'participants': [{'magicPenetrationPercent': 0,
   'alive': True,
   'participantID': 1,
   'spellVamp': 0,
   'primaryAbilityResource': 340,
   'cooldownReduction': 0,
   'lifeSteal': 0,
   'primaryAbilityResourceRegen': 16,
   'magicPenetrationPercentBonus': 0,
   'magicPenetration': 0,
   'summonerSpell2CooldownRemaining': 0,
   'healthMax': 678,
   'position': {'z': 10959, 'x': 3022},
   'magicResist': 32,
   'primaryAbilityResourceMax': 340,
   'armorPenetrationPercentBonus': 0,
   'summonerSpell1Name': 'SummonerFlash',
   'attackDamage': 79,
   'teamID': 100,
   'championName': 'Ornn',
   'ccReduction': 0,
   'armorPenetrationPercent': 0,
   'currentGold': 0,
   'playerName': 'Z10 Kaylem',
   'healthRegen': 30,
   'respawnTimer': 0,
   'attackSpeed': 100,
   'shutdownValue': 0,
   'XP': 0,
   'ultimateCooldownRemaining': 0,
   'summonerSpell1Coo

In [52]:
"""
Iterate through all events of the game and create features for both teams (100, 200) based on certain statistics.
Initialize all features as -1 in case they never occur (e.g. they never take baron) and then update them as they occur.
Every time we cross a special threshold time (e.g. 5 minutes, 10 minutes, etc.) we will save current game statistics as a row in our dataframe.

# General team-wide features (for both teams)
1. First turret time
2. First inhibitor time
3. First baron time
4. First dragon time
5. First rift herald time
6. First kill time
7. [Number of _ taken] turret/inhibitor/baron/dragon/rift herald

Put all these functions in a class GameFeaturesGenerator so we can reuse them for different games.
"""

from datetime import datetime
import pandas as pd
class GameFeaturesGenerator:
    TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%fZ'
    
    @staticmethod
    def get_event_time(start_time, event_time):
        return (datetime.strptime(event_time, GameFeaturesGenerator.TIME_FORMAT)-start_time).total_seconds()

    @staticmethod
    def flip_team_id(team_id):
        """
        Flips team_id from 100 to 200 and vice versa. Needed for when processing turret fall events because we want to update the feature for the team that lost the turret
        """
        if team_id == 100:
            return 200
        elif team_id == 200:
            return 100
        else:
            print("Error: team_id is not 100 or 200")
    
    def __init__(self, game_data, mapping_data):
        self.game_data = game_data
        self.game_start_time = datetime.strptime(game_data[0]['eventTime'], GameFeaturesGenerator.TIME_FORMAT)
        self.team_id_mapping = {"100": mapping_data['teamMapping']['100'], "200": mapping_data['teamMapping']['200']}
        
        # Create flags for first events (e.g. first turret kill, first dragon kill, etc.). These can only happen once (for both teams)
        self.first_herald_flag = True
        self.first_dragon_flag = True
        self.first_baron_flag = True
        self.first_turret_flag = True
        self.first_inhibitor_flag = True
        self.first_kill_flag = True
        
        # Create features for each team
        # EPIC MONSTER EVENT FEATURES
        self.IMPORTANT_MONSTER_TYPES = ["riftHerald", "dragon", "baron"]
        epic_monster_kill_event_features = [["first_"+monster_type+"_ind", "first_"+monster_type+"_time", "num_"+monster_type] for monster_type 
                                            in self.IMPORTANT_MONSTER_TYPES]
        epic_monster_kill_event_features = [item for sublist in epic_monster_kill_event_features for item in sublist]  # Flatten list of lists
        
        # BUILDING DESTROYED EVENT FEATURES
        self.IMPORTANT_BUILDING_TYPES = ["turret", "inhibitor"]
        building_destroyed_event_features = [["first_"+building_type+"_ind", "first_"+building_type+"_time", "num_"+building_type] for building_type 
                                             in self.IMPORTANT_BUILDING_TYPES]
        building_destroyed_event_features = [item for sublist in building_destroyed_event_features for item in sublist]  # Flatten list of lists
        
        # CHAMPION KILL EVENT FEATURES
        champion_kill_event_features = ["first_kill_ind", "first_kill_time", "num_kills"]
        
        # GAME METADATA FEATURES
        # TODO: Also need a feature for whether this was a domestic or international game (e.g. LCS vs. Worlds)
        game_metadata_features = ["game_end_time"]
        
                
        all_features =  (["team_id", "start_time", "outcome"] + 
                         epic_monster_kill_event_features + building_destroyed_event_features + champion_kill_event_features + game_metadata_features) 
        self.team_features = {"100": {feature: np.nan for feature in all_features},
                              "200": {feature: np.nan for feature in all_features}}
        # Set all "num_" features to 0
        for team_id in ["100", "200"]:
            for feature in all_features:
                if feature.startswith("num_"):
                    self.team_features[team_id][feature] = 0
        
    
    def process_epic_monster_kill_event(self, event, team_features):
        monster_type = event['monsterType']
        if monster_type in self.IMPORTANT_MONSTER_TYPES:
            event_time = event['eventTime']
            time_of_kill = GameFeaturesGenerator.get_event_time(self.game_start_time, event_time)
            killer_team_id = str(event['killerTeamID'])
            if self.first_herald_flag:
                team_features[killer_team_id]['first_riftHerald_ind'] = 1
                team_features[killer_team_id]['first_riftHerald_time'] = time_of_kill
                self.first_herald_flag = False
            if self.first_dragon_flag:
                team_features[killer_team_id]['first_dragon_ind'] = 1
                team_features[killer_team_id]['first_dragon_time'] = time_of_kill
                self.first_dragon_flag = False
            if self.first_baron_flag:
                team_features[killer_team_id]['first_baron_ind'] = 1
                team_features[killer_team_id]['first_baron_time'] = time_of_kill
                self.first_baron_flag = False
            team_features[killer_team_id]['num_'+monster_type] += 1
                
    def process_building_destroyed_event(self, event, team_features):
        building_type = event['buildingType']
        if building_type in self.IMPORTANT_BUILDING_TYPES:
            event_time = event['eventTime']
            time_of_event = GameFeaturesGenerator.get_event_time(self.game_start_time, event_time)
            team_id = str(GameFeaturesGenerator.flip_team_id(event['teamID']))
            if self.first_turret_flag:
                team_features[team_id]['first_turret_ind'] = 1
                team_features[team_id]['first_turret_time'] = time_of_event
                self.first_turret_flag = False
            if self.first_inhibitor_flag:
                team_features[team_id]['first_inhibitor_ind'] = 1
                team_features[team_id]['first_inhibitor_time'] = time_of_event
                self.first_inhibitor_flag = False
            team_features[team_id]['num_'+building_type] += 1
    
    def process_champion_kill_event(self, event, team_features):
        event_time = event['eventTime']
        time_of_event = GameFeaturesGenerator.get_event_time(self.game_start_time, event_time)
        killer_team_id = str(event['killerTeamID'])
        if self.first_kill_flag:
            team_features[killer_team_id]['first_kill_ind'] = 1
            team_features[killer_team_id]['first_kill_time'] = time_of_event
            self.first_kill_flag = False
        team_features[killer_team_id]['num_kills'] += 1
        
    def process_game_end_event(self, event, team_features):
        # We only call this method for the final event, so if for some reason there is no game_end event then discard the data 
        if event['eventType'] == "game_end":
            event_time = event['eventTime']
            time_of_event = GameFeaturesGenerator.get_event_time(self.game_start_time, event_time)
            team_features['100']['game_end_time'] = time_of_event
            team_features['200']['game_end_time'] = time_of_event
            winning_team = event['winningTeam']
            losing_team = GameFeaturesGenerator.flip_team_id(winning_team)
            team_features[str(winning_team)]['outcome'] = 1
            team_features[str(losing_team)]['outcome'] = 0
        else:
            raise Exception("Error: event is not a game_end event")
        
    def process_game(self):
        """
        Loops through all events in the game and updates the team features as they occur
        :return: a pandas dataframe with the features for each team and label "outcome"
        """
        for event in self.game_data:
            if event['eventType'] == "epic_monster_kill":
                self.process_epic_monster_kill_event(event, self.team_features)
            elif event['eventType'] == "building_destroyed":
                self.process_building_destroyed_event(event, self.team_features)
            elif event['eventType'] == "champion_kill":
                self.process_champion_kill_event(event, self.team_features)
            elif event['eventType'] == "game_end":
                self.process_game_end_event(event, self.team_features)
            else:
                continue
        
        # Now we have all the datapoints, we can create a dataframe with the team ID, label, start_time, and features
        # First assign the rest of the metadata 
        rows = []
        for team_id in ["100", "200"]:
            self.team_features[team_id]['team_id'] = self.team_id_mapping[team_id]
            self.team_features[team_id]['start_time'] = self.game_start_time
            rows.append(pd.DataFrame.from_dict(self.team_features[team_id], orient='index').transpose())
        return pd.concat(rows, ignore_index=True)
        
        
            

In [23]:
MATCH_ID = '110310652412257228'
mapping_data = mappings[MATCH_ID]
PLATFORM_GAME_ID = mappings[MATCH_ID]['platformGameId']
with open(f"games/{PLATFORM_GAME_ID}.json", "r") as json_file:
    game_data = json.load(json_file)

In [44]:
GameFeaturesGenerator(game_data, mapping_data).process_game()

Unnamed: 0,team_id,start_time,outcome,first_riftHerald_ind,first_riftHerald_time,num_riftHerald,first_dragon_ind,first_dragon_time,num_dragon,first_baron_ind,...,first_turret_ind,first_turret_time,num_turret,first_inhibitor_ind,first_inhibitor_time,num_inhibitor,first_kill_ind,first_kill_time,num_kills,game_end_time
0,109539873500788632,2023-06-20 16:06:33.390,0,,,,,,,,...,,,,,,,,,,2415.682
1,109539776003058530,2023-06-20 16:06:33.390,1,1.0,593.013,,1.0,593.013,,1.0,...,1.0,1140.841,,1.0,1140.841,,1.0,405.545,,2415.682


In [None]:
"""
 TODO: Note that this is all for TEAM based statistics, not necessarily carrying information about the individual players yet 
 - Create features for "stats_update" type of events, which are the most granular
 - Do so in a way such that every time we pass the 5/10/15 minute markers, we compute stats@5, stats@10, stats@15, etc.
 - Can be very granular in computing the player features 
 
 To actually test this framework 
 - Take a tournament as test data and suppose you want to predict the final results of the tournament
 - For a given matchup (e.g. TSM vs. C9), you can use the features from the previous X=5/10/15 games of the tournament to predict the outcome of the next game
"""
