In [18]:
import boto3
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
import gzip
import io
import json

# Initialize the S3 client
s3 = boto3.client('s3')

# Specify the bucket name and object key
bucket_name = 'vcthackathonbucket1'
object_key = 'game-changers/games/2022/val:0429689c-85fa-4448-bbfb-10a099864809.json.gz'

In [19]:
def getUnzippedData(bucket_name, object_key):
    try:
        # Retrieve the object
        response = s3.get_object(Bucket=bucket_name, Key=object_key)
    
        # Read the object's content into memory
        data = response['Body'].read()
    
        # Unzip the gzipped content
        with gzip.GzipFile(fileobj=io.BytesIO(data)) as gz:
            unzipped_data = (gz.read())
    
        return json.loads(unzipped_data)
    
    except NoCredentialsError:
        print("Credentials not available.")
    except PartialCredentialsError:
        print("Incomplete credentials provided.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [20]:
# A cohesive dictionary which will contain all the information we want to feed into the LLM 
players = {}

PARSE LEAGUES

In [22]:
# Specify the bucket name and object key
bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/leagues.json.gz'

leagues = {}
leaguesJSON = getUnzippedData(bucket_name, object_key)

In [25]:
league = {}
#print(leaguesJSON[:2])
for league in leaguesJSON:
    leagues[league['league_id']] = {'name': league['name'], 'region': league['region']}

print(len(leagues))

11


PARSE TEAMS

In [26]:
# Specify the bucket name and object key
bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/teams.json.gz'

teams = {}
teamsJSON = getUnzippedData(bucket_name, object_key)
for team in teamsJSON:
        teams[team['id']] = {'name': team['name'], 'acronym': team['acronym'], 'home_league_id': team['home_league_id']}
print(len(teams))

283


PARSE TOURNAMENTS

In [27]:
# Specify the bucket name and object key
bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/tournaments.json.gz'

tournaments = {}
tournamentsJSON = getUnzippedData(bucket_name, object_key)
for tournament in tournamentsJSON:
    tournaments[tournament['id']] = {'league_id': tournament['league_id'], 'name': tournament['name']}

print(len(tournaments))

57


PARSE PLAYERS

In [29]:
from datetime import datetime

bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/players.json.gz'

players = {}
playersJSON = getUnzippedData(bucket_name, object_key)

date_object = datetime.strptime(playersJSON[0]['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
# print(f'# of players in JSON: {len(playersJSON)}')
for player in playersJSON:
    currentDate = datetime.strptime(player['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
    if player['id'] in players:

        #if a duplicate entry, keep the most recent update
        if currentDate > players[player['id']]['updated_at']:
            
            # if most recent is archived, remove them
            if player['status']=="archived":
                # if most recent status of a player is archived
                del players[player['id']]
                
            #otherwise, update them
            else:
                players[player['id']]['handle'] = player['handle']
                players[player['id']]['updated_at'] = currentDate
                players[player['id']]['first_name'] = player['first_name']
                players[player['id']]['last_name'] = player['last_name']
                players[player['id']]['home_team_id'] = player['home_team_id']
                
    else:
        if player['status'] == 'archived':
            continue
        players[player['id']] = {}
        players[player['id']]['handle'] = player['handle']
        players[player['id']]['updated_at'] = currentDate
        players[player['id']]['first_name'] = player['first_name']
        players[player['id']]['last_name'] = player['last_name']
        players[player['id']]['home_team_id'] = player['home_team_id']

print(len(players))

# of players in JSON: 2999
1498


PARSE MAPPING DATA
* note that mapping data is per game *

In [30]:
mappingData = {}

bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/mapping_data_v2.json.gz'

mappingDataJSON = getUnzippedData(bucket_name, object_key)
for map in mappingDataJSON:
    mappingData[map['platformGameId']] = {
        'esportsGameId': map['esportsGameId'],
        #'matchId': map['matchId'],
        'tournamentId': map['tournamentId'],
        'teamMapping': map['teamMapping'],
        'participantMapping': map['participantMapping']
    }
    
print(len(mappingData))

1140


TODO -> function to parse per game file in order to update players dictionary
TODO -> call to invoke function for each file in a certain year

In [ ]:
# TODO: Change this to be all game files instead of just one
bucket_name = 'vcthackathon-data'
object_key = 'game-changers/games/2024/val:001949ff-e8a0-40fb-8ba5-65e123df8e1d.json.gz'

gameData = getUnzippedData(bucket_name, object_key)

In [ ]:
def config_event(event, mappingData, temp_players):
    # Link to mapping data using platformGameId
    platform_game_id = event['platformGameId']
    
    if platform_game_id not in mappingData:
        print(f"Platform game ID {platform_game_id} not found in mapping data")
        return temp_players, None  # Return if no matching platformGameId in mapping data
    
    mapping_info = mappingData[platform_game_id]
    
    # Extract map info
    map_info = event['configuration']['selectedMap']['fallback']['displayName']
    
    # Extract team and player information
    for team in event['configuration']['teams']:
        team_id = team['teamId']['value']
        team_name = team['name']
        
        for player in team['playersInTeam']:
            player_id = player['value']
            
            # Get player-specific data from the 'players' field in the config event
            player_info = next((p for p in event['configuration']['players'] if p['playerId']['value'] == player_id), None)
            if not player_info:
                continue  # Skip if player info not found
            
            # Extract relevant player information
            player_handle = player_info['accountId']['value']
            player_name = player_info['displayName']
            player_tag = player_info['tagLine']
            selected_agent = player_info['selectedAgent']['fallback']['guid']  # GUID of the agent
            
            # Get global player ID from the participantMapping
            global_player_id = mapping_info['participantMapping'].get(str(player_id))
            if not global_player_id:
                continue  # Skip if no global player ID is found
            
            # Initialize the player entry if it doesn't exist in temp_players
            if global_player_id not in temp_players:
                temp_players[global_player_id] = {
                    'ID': player_handle,
                    'handle': player_handle,
                    'name': player_name,
                    'team': team_name,
                    'region': None,  # Can be filled in later
                    'league_name': None,  # Can be filled in later
                    'Games': []  # List of games
                }
            
            # Add game-specific info
            game_info = {
                'Year': None,  # Year can be filled in from other events
                'Tournament Name': mapping_info['tournamentId'],  # From mapping data
                'Attacking Kills': 0,  # Initialize to 0, update later
                'Defending Kills': 0,  # Initialize to 0, update later
                'Attacking Deaths': 0,  # Initialize to 0, update later
                'Defending Deaths': 0,  # Initialize to 0, update later
                'Attacking Revives': 0,  # Initialize to 0, update later
                'Defending Revives': 0,  # Initialize to 0, update later
                'Assists (attacking/defending)': 0,  # Initialize to 0, update later
                'Map': map_info,
                'Agent': selected_agent
            }
            
            temp_players[global_player_id]['Games'].append(game_info)
    
    # Initialize attack/defend info
    attack_defend_info = {
        "attackingTeam": event['configuration']['spikeMode']['attackingTeam']['value'],
        "defendingTeam": event['configuration']['spikeMode']['defendingTeam']['value'],
    }
    
    return temp_players, attack_defend_info

In [ ]:
def player_died_event(event, attack_defend_info, temp_players, mappingData):
    platform_game_id = event['platformGameId']
    
    if platform_game_id not in mappingData:
        print(f"Platform game ID {platform_game_id} not found in mapping data")
        return temp_players
    
    # Extract player IDs
    deceased_id = event['playerDied']['deceasedId']['value']
    killer_id = event['playerDied']['killerId']['value']
    
    # Use participantMapping to map to global player ID
    global_deceased_id = mappingData[platform_game_id]['participantMapping'].get(str(deceased_id))
    global_killer_id = mappingData[platform_game_id]['participantMapping'].get(str(killer_id))
    
    # If no valid mappings are found, return
    if not global_deceased_id or not global_killer_id:
        return temp_players
    
    # Determine if the players are on the attacking or defending team
    attacking_team = attack_defend_info['attackingTeam']
    defending_team = attack_defend_info['defendingTeam']
    
    deceased_team = next((team_id for team_id, mapping in mappingData[platform_game_id]['participantMapping'].items() if mapping == global_deceased_id), None)
    killer_team = next((team_id for team_id, mapping in mappingData[platform_game_id]['participantMapping'].items() if mapping == global_killer_id), None)
    
    if deceased_team == attacking_team:
        temp_players[global_deceased_id]['Games'][-1]['Attacking Deaths'] += 1
    else:
        temp_players[global_deceased_id]['Games'][-1]['Defending Deaths'] += 1
    
    if killer_team == attacking_team:
        temp_players[global_killer_id]['Games'][-1]['Attacking Kills'] += 1
    else:
        temp_players[global_killer_id]['Games'][-1]['Defending Kills'] += 1
    
    # Update assistant data if there are any
    for assistant in event['playerDied'].get('assistants', []):
        assistant_id = assistant['assistantId']['value']
        global_assistant_id = mappingData[platform_game_id]['participantMapping'].get(str(assistant_id))
        
        if not global_assistant_id:
            continue
        
        assistant_team = next((team_id for team_id, mapping in mappingData[platform_game_id]['participantMapping'].items() if mapping == global_assistant_id), None)
        
        if assistant_team == attacking_team:
            temp_players[global_assistant_id]['Games'][-1]['Assists (attacking/defending)'] += 1
        else:
            temp_players[global_assistant_id]['Games'][-1]['Assists (attacking/defending)'] += 1
    
    return temp_players

In [14]:
# this function takes in a game data file, and parses through it, returning only what we want
# @returns updated players dictionary 
def parse_game_data(gameData, players, mappingData):
    temp_players = {}

    for event in gameData:
        if 'configuration' in event:
            config_event(event, temp_players, mappingData)
        elif 'playerDied' in event:
            player_died_event(event, temp_players, mappingData)
    
    # Merge temp_players into the main players dictionary
    for player_id, player_data in temp_players.items():
        if player_id in players:
            # Merge game information if player already exists in the players dictionary
            players[player_id]['Games'].extend(player_data['Games'])
        else:
            # Add new player entry if not in players dictionary
            players[player_id] = player_data

    return players

    

In [ ]:
parse_game_data(gameData, players)
print(len(players))