# Tournaments Processing

This notebook iterates through all tournaments in a given year, downloads each game of the tournament, processes them by iterating through each frame, and condenses them into a single row of data. We then later save this data for modeling and inference.

In [None]:
import pandas as pd
pd.set_option('mode.chained_assignment', None)
import numpy as np
import os
from tqdm.notebook import tqdm
from data_processing.GameFeaturesGenerator import GameFeaturesGenerator
from data_processing.TournamentDataProcessor import TournamentDataProcessor
from data_processing.LeaguesDataProcessor import LeaguesDataProcessor
from data_processing.utils.download_functions import *
os.chdir('esports-data')
os.listdir()

In [None]:
# Read in teams data
with open("teams.json", "r") as json_file:
   teams_data = json.load(json_file)

team_df = []
for team in teams_data:
    team_df.append({'team_id': team['team_id'], 'team_name': team['name']})
    
team_df = pd.DataFrame(team_df)

In [None]:
# Read in tournament data
with open("tournaments.json", "r") as json_file:
   tournament_data_all = json.load(json_file)

# Read in mappings data
with open("mapping_data.json", "r") as json_file:
   mappings_data = json.load(json_file)
   
mappings = {
   esports_game["esportsGameId"]: esports_game for esports_game in mappings_data
}

# Set up LeaguesDataProcessor
leagues_data_processor = LeaguesDataProcessor()

In [None]:
tournaments_year = [x['slug'] for x in tournament_data_all if '2023' in x['slug']]

tournaments = [x for x in tournament_data_all if x['slug'] in tournaments_year]
tournaments_names = [x['slug'] for x in tournaments]
print(tournaments_names)

In [None]:
# tournaments = [x for x in tournament_data_all if x['slug'] in ['pcs_summer_playoffs_2023']]

In [None]:
"""
Iterate through each tournament and process the data, we will get two dataframes 

tournament_rows = [match_id, esportsGameId, league, team_id_1, team_id_2, outcome_1, outcome_2]
game_rows = [platformGameId, esportsGameId, team_id, start_time, outcome, ...features..., ...league_indicators...]]

Save these dataframes to csv files in the data folder with the corresponding tournament name 
"""

for tournament_data in tournaments:
    tournament_name = tournament_data['slug']
    print("Processing: " + tournament_name)
    # Set up TournamentDataProcessor
    try:
        tournament_data_processor = TournamentDataProcessor(tournament_data, leagues_data_processor.leagues_df)
    except IndexError as e:
        print(f"Error processing tournament {tournament_name}. Tournament could not be found in leagues data")
        continue
    # tournament_data_processor.get_tournament_stages()
    training_data, _ = tournament_data_processor.get_tournament_data(training_stages=[], testing_stages=[])  # Consider all stages to be training 
    print("Games in tournament: " + str(len(training_data)))
    
    # Now download each game, process them, and add them into game_rows 
    directory = "games"
    if not os.path.exists(directory):
      os.makedirs(directory)
    
    # Load each game and process them
    game_rows = []
    for i in tqdm(range(len(training_data))):
        try:
            game_id = training_data.iloc[i]['esportsGameId']
            game_mapping_data = mappings[game_id]
            platform_game_id = game_mapping_data['platformGameId']
            download_gzip_and_write_to_json(f"{directory}/{platform_game_id}")
            with open(f"games/{platform_game_id}.json", "r") as json_file:
                game_data = json.load(json_file)
        except KeyError:
            print(f"Match {game_id} was not found")
            continue
        try:
            game_features = GameFeaturesGenerator(game_data, game_mapping_data).process_game()
        except Exception as e:
            print(f"Error processing game {game_id}")
            print(e)
            continue
        game_features['league'] = training_data.iloc[i]['league']
        game_rows.append(game_features)
    
    game_rows = pd.concat(game_rows)
    game_rows_leagues = leagues_data_processor.transform_league_col(game_rows['league'])
    game_rows = pd.concat([game_rows.drop(['league'],axis=1).reset_index(), game_rows_leagues], axis=1)
    
    # Save to csv in the data folder
    game_data_directory = "2023_raw_game_data"
    if not os.path.exists(game_data_directory):
      os.makedirs(game_data_directory)
    training_data.to_csv(f'{game_data_directory}/{tournament_name}_tournament_rows.csv', index=False)
    game_rows.to_csv(f'{game_data_directory}/{tournament_name}_game_rows.csv', index=False)
    
    # Delete the temp directory
    shutil.rmtree(directory)