In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

from data_processing.GameFeaturesGenerator import GameFeaturesGenerator
from data_processing.utils.download_functions import *

In [2]:
# Read in tournament data
with open("tournaments.json", "r") as json_file:
   tournament_data_all = json.load(json_file)

# Read in mappings data
with open("mapping_data.json", "r") as json_file:
   mappings_data = json.load(json_file)
   
mappings = {
   esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
}


# Only retrieve LCS tournament data ['lcs_summer_2023']
tournament_data = [x for x in tournament_data_all if x['slug']=='lcs_summer_2023'][0]

['games',
 'leagues.json',
 'mapping_data.json',
 'players.json',
 'teams.json',
 'tournaments.json']

In [3]:
# Read in teams data
with open("teams.json", "r") as json_file:
   teams_data = json.load(json_file)

In [4]:
# Read in leagues data
with open("leagues.json", "r") as json_file:
   leagues_data = json.load(json_file)

# Iterate through leagues_data and create a dataframe with the league_id, league_name, and league_region
leagues_df = []
for league in leagues_data:
    league_id = league['id']
    league_name = league['name']
    league_region = league['region']
    league_tournaments = [x['id'] for x in league['tournaments']]
    league_df_sub = pd.DataFrame({'league_id':league_id, 'league_name':league_name, 'league_region':league_region, 'league_tournaments':league_tournaments})
    leagues_df.append(league_df_sub)
leagues_df = pd.concat(leagues_df)

In [5]:
# Read in tournament data
with open("tournaments.json", "r") as json_file:
   tournament_data_all = json.load(json_file)

# Read in mappings data
with open("mapping_data.json", "r") as json_file:
   mappings_data = json.load(json_file)
   
mappings = {
   esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
}


# Only retrieve LCS tournament data ['lcs_summer_2023']
tournament_data = [x for x in tournament_data_all if x['slug']=='lcs_summer_2023'][0]

In [11]:
leagues_df

Unnamed: 0,league_id,league_name,league_region,league_tournaments
0,98767991299243165,LCS,NORTH AMERICA,110303581083678395
1,98767991299243165,LCS,NORTH AMERICA,109517090066605615
2,98767991299243165,LCS,NORTH AMERICA,108206581962155974
3,98767991299243165,LCS,NORTH AMERICA,107458367237283414
4,98767991299243165,LCS,NORTH AMERICA,107458335260330212
...,...,...,...,...
5,105549980953490846,CBLOL Academy,BRAZIL,105562692792011783
0,110371976858004491,North Regional League,LATIN AMERICA NORTH,110372140231846916
0,110372322609949919,South Regional League,LATIN AMERICA SOUTH,110372454557117766
0,108001239847565215,TFT Rising Legends,INTERNATIONAL,109761195185432372


In [12]:
leagues_df[leagues_df['league_id']==tournament_data['leagueId']]

Unnamed: 0,league_id,league_name,league_region,league_tournaments
0,98767991299243165,LCS,NORTH AMERICA,110303581083678395
1,98767991299243165,LCS,NORTH AMERICA,109517090066605615
2,98767991299243165,LCS,NORTH AMERICA,108206581962155974
3,98767991299243165,LCS,NORTH AMERICA,107458367237283414
4,98767991299243165,LCS,NORTH AMERICA,107458335260330212
5,98767991299243165,LCS,NORTH AMERICA,105658534671026792
6,98767991299243165,LCS,NORTH AMERICA,105788932118361426
7,98767991299243165,LCS,NORTH AMERICA,109428868589633757
8,98767991299243165,LCS,NORTH AMERICA,105522217230238828
9,98767991299243165,LCS,NORTH AMERICA,104174992692075107


In [7]:
tournament_data

{'id': '110303581083678395',
 'leagueId': '98767991299243165',
 'name': 'Summer 2023',
 'slug': 'lcs_summer_2023',
 'sport': 'lol',
 'startDate': '2023-05-31',
 'endDate': '2023-08-22',
 'stages': [{'name': 'Regular Season',
   'type': None,
   'slug': 'regular_season',
   'sections': [{'name': 'Regular Season',
     'matches': [{'id': '110303581088069312',
       'type': 'normal',
       'state': 'completed',
       'mode': 'classic',
       'strategy': {'type': 'bestOf', 'count': 1},
       'teams': [{'id': '99294153828264740',
         'side': 'blue',
         'record': {'wins': 7, 'losses': 12, 'ties': 0},
         'result': {'outcome': 'loss', 'gameWins': 0},
         'players': [{'id': '99322214616775017', 'role': 'mid'},
          {'id': '98926509785257793', 'role': 'top'},
          {'id': '107569568015156338', 'role': 'top'},
          {'id': '98767991769705572', 'role': 'bottom'},
          {'id': '106625308523122120', 'role': 'support'},
          {'id': '104559243245199489'

In [20]:

class TournamentDataProcessor:
    def __init__(self, tournament_data, leagues_data):
        # Leagues_data is a list of dictionaries with the league_id, league_name, and league_region so that we can designate tournament region
        self.tournament_data = tournament_data
        self.tournament_id = tournament_data['id']
        self.tournament_league_id = tournament_data['leagueId']
        self.tournament_name = tournament_data['slug']
        self.tournament_stages = [x['name'] for x in tournament_data['stages']]
        tournament_leagues = leagues_data[leagues_data['league_id']==self.tournament_league_id]
        try:
            self.tournament_region = tournament_leagues['league_region'][0]
        except IndexError:
            raise ValueError(f"League ID {self.tournament_league_id} not found in leagues data")
        
    def get_tournament_stages(self):
        return [[x['name'], len(x['sections'])] for x in self.tournament_data['stages']]
    
    def get_tournament_data(self, training_stages = None, testing_stages=['Playoffs']):
        # Iterate through the training stages and aggregate the data together, do the same for the testing stages
        # Return the training and testing data
        # If no training stages are specified, use all stages except the testing stages
        if training_stages is None:
            training_stages = [x['name'] for x in tournament_data['stages'] if x['name'] not in testing_stages]
            
        # Validate that the training and testing stages are valid within the tournament
        for stage in training_stages+testing_stages:
            if stage not in self.tournament_stages:
                raise ValueError(f"Stage {stage} not found in tournament {self.tournament_name}")
            
        self.training_stages = training_stages
        self.testing_stages = testing_stages
        training_data = []
        testing_data = []
        for stage in self.tournament_data['stages']:
            if stage['name'] in training_stages:
                for section in stage['sections']:
                    for match in section['matches']:
                        training_data.append(self.get_game_data_full(match))
            elif stage['name'] in testing_stages:
                for section in stage['sections']:
                    for match in section['matches']:
                        testing_data.append(self.get_game_data_full(match))
        
        training_data = pd.concat(training_data, ignore_index=True)
        testing_data = pd.concat(testing_data, ignore_index=True)
        training_data = self.append_flipped_team_and_outcomes(training_data)
        
        return training_data, testing_data

    def get_game_data_full(self, games_data):
        # Iterate through t events of the match (could consist of one or many games) 
        # This is called at the match level (i.e., tournament_data['stages'][0]['sections'][0]['matches'] ) 
        # Look in the ['games'][t]['id'] field to get the game ID
        # Look in the ['games'][t]['state'] field to see if the game is 'completed'
        # Look in the ['games'][t]['teams'] field to get the team IDs
        # Look in the ['games'][t]['teams'][x]['result']['outcome'] field to get the result of the game for each team
        # We technically only need the 'state' to verify completion and 'id' to fetch details of the game, but load in other fields for verification
        match_id = games_data['id']  # ID for the full match
        game_tables = []
        for game in games_data['games']:
            game_state = game['state']
            if game_state == 'completed':
                game_id = game['id']  # ID for the specific games in the match 
                team_ids, team_outcomes = [], []
                for team in game['teams']:
                    team_ids.append(team['id'])
                    team_outcome = 1 if team['result']['outcome'] == 'win' else 0
                    team_outcomes.append(team_outcome)
                game_tables.append(pd.DataFrame({'match_id': match_id, 'esportsGameId': game_id, 'region': self.tournament_region,
                                                 'team_id_1': team_ids[0], 'outcome_1': team_outcomes[0],
                                                 'team_id_2': team_ids[1], 'outcome_2': team_outcomes[1]}, index=[0]))
        return pd.concat(game_tables, ignore_index=True)
    
    @staticmethod
    def swap_columns(df, cols1, cols2):
        """Swap the corresponding values of each of the columns of cols1 with cols2 and return a copy of the df"""
        if len(cols1) != len(cols2):
            raise ValueError("The number of columns to swap must be equal")
        df_copy = df.copy(deep=True)
        for i in range(len(cols1)):
            col_val = df_copy[cols1[i]].copy()
            df_copy[cols1[i]] = df_copy[cols2[i]]
            df_copy[cols2[i]] = col_val
        return df_copy
    
    def append_flipped_team_and_outcomes(self, data):
        # Want to ensure that there's symmetry between the teams and outcomes to prevent overfitting 
        return pd.concat([data, TournamentDataProcessor.swap_columns(data, ['team_id_1', 'outcome_1'], ['team_id_2', 'outcome_2'])], ignore_index=True)
        

In [21]:
tournament_data_processor = TournamentDataProcessor(tournament_data, leagues_df)
training_data, testing_data = tournament_data_processor.get_tournament_data()

In [22]:
training_data

Unnamed: 0,match_id,esportsGameId,region,team_id_1,outcome_1,team_id_2,outcome_2
0,110303581088069312,110303581088134849,NORTH AMERICA,99294153828264740,0,98767991877340524,1
1,110303581088331458,110303581088331459,NORTH AMERICA,98767991877340524,1,103461966951059521,0
2,110303581088331460,110303581088331461,NORTH AMERICA,98767991877340524,1,99294153824386385,0
3,110303581088331462,110303581088331463,NORTH AMERICA,98926509885559666,1,98767991877340524,0
4,110303581088331464,110303581088331465,NORTH AMERICA,98767991877340524,1,98926509892121852,0
...,...,...,...,...,...,...,...
179,110303581088528238,110303581088528239,NORTH AMERICA,106972778172351142,0,98767991930907107,1
180,110303581088528240,110303581088528241,NORTH AMERICA,106972778172351142,0,98767991860392497,1
181,110303581088528242,110303581088528243,NORTH AMERICA,98767991860392497,1,98767991930907107,0
182,110754941553978393,110754941553978394,NORTH AMERICA,99294153828264740,0,98926509883054987,1


In [15]:
os.path.exists(f"games/{platform_game_id}.json")

NameError: name 'platform_game_id' is not defined

In [104]:
with open(f"games/{platform_game_id}.json", "r") as json_file:
    game_data = json.load(json_file)

In [102]:
platform_game_id = mappings[training_data.iloc[1]['esportsGameId']]['platformGameId']

In [94]:
f"games/{platform_game_id}.json"

'games/ESPORTSTMNT01:3311377.json'

In [12]:
os.listdir('./games')

['ESPORTSTMNT01', 'ESPORTSTMNT02', 'ESPORTSTMNT03', 'ESPORTSTMNT04']

In [17]:
directory = "games"
if not os.path.exists(directory):
   os.makedirs(directory)

# Load each game and process them
game_rows = []
for game in tqdm(training_data):
    try:
        game_id = game['esportsGameId']
        region = game['league_region']
        game_mapping_data = mappings[game_id]
        platform_game_id = game_mapping_data['platformGameId']
        # download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")
        with open(f"games/{platform_game_id}.json", "r") as json_file:
            game_data = json.load(json_file)
        game_features = GameFeaturesGenerator(game_data, game_mapping_data).process_game()
        game_rows.append(game_features)
    except KeyError:
        print(f"Match {game_id} was not found")


100%|██████████| 10/10 [00:15<00:00,  1.60s/it]


In [23]:
game_rows[1]

Unnamed: 0,platformGameId,esportsGameId,team_id,start_time,outcome,first_riftHerald_ind,first_riftHerald_time,num_riftHerald,first_dragon_ind,first_dragon_time,...,first_turret_ind,first_turret_time,num_turret,first_inhibitor_ind,first_inhibitor_time,num_inhibitor,first_kill_ind,first_kill_time,num_kills,game_end_time
0,ESPORTSTMNT02:3214865,110303581088331459,98767991877340524,2023-06-30 21:08:08.783,1,1,602.508,2,1,602.508,...,1,815.009,11,1,815.009,2,1,185.993,25,1337.233
1,ESPORTSTMNT02:3214865,110303581088331459,103461966951059521,2023-06-30 21:08:08.783,0,0,,0,0,,...,0,,2,0,,0,0,,3,1337.233
