In [1]:
import pandas as pd
import numpy as np
import os

from data_processing.GameFeaturesGenerator import GameFeaturesGenerator
from data_processing.utils.download_functions import *

In [2]:
os.chdir('esports-data')
os.listdir()

['games',
 'leagues.json',
 'mapping_data.json',
 'players.json',
 'teams.json',
 'tournaments.json']

In [98]:
# Read in tournament data
with open("tournaments.json", "r") as json_file:
   tournament_data_all = json.load(json_file)

# Read in mappings data
with open("mapping_data.json", "r") as json_file:
   mappings_data = json.load(json_file)
   
mappings = {
   esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
}

# Only retrieve LCS tournament data ['lcs_summer_2023']
tournament_data = [x for x in tournament_data_all if x['slug']=='lcs_summer_2023'][0]

In [99]:

class TournamentDataProcessor:
    def __init__(self, tournament_data):
        self.tournament_data = tournament_data
        self.tournament_name = tournament_data['slug']
        self.tournament_stages = [x['name'] for x in tournament_data['stages']]
    
    def get_tournament_stages(self):
        return [[x['name'], len(x['sections'])] for x in self.tournament_data['stages']]
    
    def get_tournament_data(self, training_stages = None, testing_stages=['Playoffs']):
        # Iterate through the training stages and aggregate the data together, do the same for the testing stages
        # Return the training and testing data
        # If no training stages are specified, use all stages except the testing stages
        if training_stages is None:
            training_stages = [x['name'] for x in tournament_data['stages'] if x['name'] not in testing_stages]
            
        # Validate that the training and testing stages are valid within the tournament
        for stage in training_stages+testing_stages:
            if stage not in self.tournament_stages:
                raise ValueError(f"Stage {stage} not found in tournament {self.tournament_name}")
            
        self.training_stages = training_stages
        self.testing_stages = testing_stages
        training_data = []
        testing_data = []
        for stage in self.tournament_data['stages']:
            if stage['name'] in training_stages:
                for section in stage['sections']:
                    for match in section['matches']:
                        training_data.append(self.get_game_data_full(match))
            elif stage['name'] in testing_stages:
                for section in stage['sections']:
                    for match in section['matches']:
                        testing_data.append(self.get_game_data_full(match))
        
        training_data = pd.concat(training_data, ignore_index=True)
        testing_data = pd.concat(testing_data, ignore_index=True)
        training_data = self.append_flipped_team_and_outcomes(training_data)
        
        return training_data, testing_data

    def get_game_data_full(self, games_data):
        # Iterate through t events of the match (could consist of one or many games) 
        # This is called at the match level (i.e., tournament_data['stages'][0]['sections'][0]['matches'] ) 
        # Look in the ['games'][t]['id'] field to get the game ID
        # Look in the ['games'][t]['state'] field to see if the game is 'completed'
        # Look in the ['games'][t]['teams'] field to get the team IDs
        # Look in the ['games'][t]['teams'][x]['result']['outcome'] field to get the result of the game for each team
        # We technically only need the 'state' to verify completion and 'id' to fetch details of the game, but load in other fields for verification
        match_id = games_data['id']  # ID for the full match
        game_tables = []
        for game in games_data['games']:
            game_state = game['state']
            if game_state == 'completed':
                game_id = game['id']  # ID for the specific games in the match 
                team_ids, team_outcomes = [], []
                for team in game['teams']:
                    team_ids.append(team['id'])
                    team_outcome = 1 if team['result']['outcome'] == 'win' else 0
                    team_outcomes.append(team_outcome)
                game_tables.append(pd.DataFrame({'match_id': match_id, 'esportsGameId': game_id,
                                                 'team_id_1': team_ids[0], 'outcome_1': team_outcomes[0],
                                                 'team_id_2': team_ids[1], 'outcome_2': team_outcomes[1]}, index=[0]))
        return pd.concat(game_tables, ignore_index=True)
    
    @staticmethod
    def swap_columns(df, cols1, cols2):
        """Swap the corresponding values of each of the columns of cols1 with cols2 and return a copy of the df"""
        if len(cols1) != len(cols2):
            raise ValueError("The number of columns to swap must be equal")
        df_copy = df.copy(deep=True)
        for i in range(len(cols1)):
            col_val = df_copy[cols1[i]].copy()
            df_copy[cols1[i]] = df_copy[cols2[i]]
            df_copy[cols2[i]] = col_val
        return df_copy
    
    def append_flipped_team_and_outcomes(self, data):
        # Want to ensure that there's symmetry between the teams and outcomes to prevent overfitting 
        return pd.concat([data, TournamentDataProcessor.swap_columns(data, ['team_id_1', 'outcome_1'], ['team_id_2', 'outcome_2'])], ignore_index=True)
        

In [100]:
tournament_data_processor = TournamentDataProcessor(tournament_data)
training_data, testing_data = tournament_data_processor.get_tournament_data()

In [103]:
os.path.exists(f"games/{platform_game_id}.json")

True

In [104]:
with open(f"games/{platform_game_id}.json", "r") as json_file:
    game_data = json.load(json_file)

In [102]:
platform_game_id = mappings[training_data.iloc[1]['esportsGameId']]['platformGameId']

In [94]:
f"games/{platform_game_id}.json"

'games/ESPORTSTMNT01:3311377.json'

In [None]:
directory = "games"
if not os.path.exists(directory):
   os.makedirs(directory)

# Load each game and process them
game_rows = []
for game_id in reg_season_games['esportsGameId']:
    try:
        game_mapping_data = mappings[game_id]
        platform_game_id = game_mapping_data['platformGameId']
        # download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")
        with open(f"games/{platform_game_id}.json", "r") as json_file:
            game_data = json.load(json_file)
        game_features = GameFeaturesGenerator(game_data, game_mapping_data).process_game()
        game_rows.append(game_features)
    except KeyError:
        print(f"Match {game_id} was not found")
