In [18]:
import boto3
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
import gzip
import io
import json

# Initialize the S3 client
s3 = boto3.client('s3')

# Specify the bucket name and object key
bucket_name = 'vcthackathonbucket1'
object_key = 'game-changers/games/2022/val:0429689c-85fa-4448-bbfb-10a099864809.json.gz'

In [19]:
def getUnzippedData(bucket_name, object_key):
    try:
        # Retrieve the object
        response = s3.get_object(Bucket=bucket_name, Key=object_key)
    
        # Read the object's content into memory
        data = response['Body'].read()
    
        # Unzip the gzipped content
        with gzip.GzipFile(fileobj=io.BytesIO(data)) as gz:
            unzipped_data = (gz.read())
    
        return json.loads(unzipped_data)
    
    except NoCredentialsError:
        print("Credentials not available.")
    except PartialCredentialsError:
        print("Incomplete credentials provided.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [20]:
# A cohesive dictionary which will contain all the information we want to feed into the LLM 
players = {}

PARSE LEAGUES

In [22]:
# Specify the bucket name and object key
bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/leagues.json.gz'

leagues = {}
leaguesJSON = getUnzippedData(bucket_name, object_key)

In [25]:
league = {}
#print(leaguesJSON[:2])
for league in leaguesJSON:
    leagues[league['league_id']] = {'name': league['name'], 'region': league['region']}

print(len(leagues))

11


PARSE TEAMS

In [26]:
# Specify the bucket name and object key
bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/teams.json.gz'

teams = {}
teamsJSON = getUnzippedData(bucket_name, object_key)
for team in teamsJSON:
        teams[team['id']] = {'name': team['name'], 'acronym': team['acronym'], 'home_league_id': team['home_league_id']}
print(len(teams))

283


PARSE TOURNAMENTS

In [27]:
# Specify the bucket name and object key
bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/tournaments.json.gz'

tournaments = {}
tournamentsJSON = getUnzippedData(bucket_name, object_key)
for tournament in tournamentsJSON:
    tournaments[tournament['id']] = {'league_id': tournament['league_id'], 'name': tournament['name']}

print(len(tournaments))

57


PARSE PLAYERS

In [29]:
from datetime import datetime

bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/players.json.gz'

players = {}
playersJSON = getUnzippedData(bucket_name, object_key)

date_object = datetime.strptime(playersJSON[0]['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
# print(f'# of players in JSON: {len(playersJSON)}')
for player in playersJSON:
    currentDate = datetime.strptime(player['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
    if player['id'] in players:

        #if a duplicate entry, keep the most recent update
        if currentDate > players[player['id']]['updated_at']:
            
            # if most recent is archived, remove them
            if player['status']=="archived":
                # if most recent status of a player is archived
                del players[player['id']]
                
            #otherwise, update them
            else:
                players[player['id']]['handle'] = player['handle']
                players[player['id']]['updated_at'] = currentDate
                players[player['id']]['first_name'] = player['first_name']
                players[player['id']]['last_name'] = player['last_name']
                players[player['id']]['home_team_id'] = player['home_team_id']
                
    else:
        if player['status'] == 'archived':
            continue
        players[player['id']] = {}
        players[player['id']]['handle'] = player['handle']
        players[player['id']]['updated_at'] = currentDate
        players[player['id']]['first_name'] = player['first_name']
        players[player['id']]['last_name'] = player['last_name']
        players[player['id']]['home_team_id'] = player['home_team_id']

print(len(players))

# of players in JSON: 2999
1498


PARSE MAPPING DATA
* note that mapping data is per game *

In [30]:
mappingData = {}

bucket_name = 'vcthackathon-data'
object_key = 'game-changers/esports-data/mapping_data_v2.json.gz'

mappingDataJSON = getUnzippedData(bucket_name, object_key)
for map in mappingDataJSON:
    mappingData[map['platformGameId']] = {
        'esportsGameId': map['esportsGameId'],
        #'matchId': map['matchId'],
        'tournamentId': map['tournamentId'],
        'teamMapping': map['teamMapping'],
        'participantMapping': map['participantMapping']
    }
    
print(len(mappingData))

1140


TODO -> function to parse per game file in order to update players dictionary
TODO -> call to invoke function for each file in a certain year

In [ ]:
# TODO: Change this to be all game files instead of just one
bucket_name = 'vcthackathon-data'
object_key = 'game-changers/games/2024/val:001949ff-e8a0-40fb-8ba5-65e123df8e1d.json.gz'

gameData = getUnzippedData(bucket_name, object_key)

In [14]:
# this function takes in a game data file, and parses through it, returning only what we want
# @returns updated players dictionary 
def parse_game_data(gameData, players):
    
    
    # go through the first config event to get the following info: platform game id to link to mapping data
    # 
    # game team id, game player id, agent, color, map
    
    
    # establish link between mapping data and 
    
    
    # 
    
    return None
    

In [ ]:
parse_game_data(gameData, players)
print(len(players))