## Games Data

In [2]:
import requests
import json
import gzip
import shutil
import time
import os
from io import BytesIO
import pandas as pd
import datetime as dt
#TO_DO: add packages related to AWS later to upload stuff there

Here we filter out certain events in our game json data, because they don't seem to be relevant/ or will biase our model

In [3]:
def filter_out_events(game_event):

    event_filter = ['stats_update','item_purchased','item_destroyed',
                    'item_undo','item_sold','skill_level_up' ,'ward_placed', 'champion_level_up']

    #No event found: either it's messy data, or there's nothing to record.
    if game_event.get('eventType',None) is None:
        return False
    return (game_event.get('eventType',None) not in event_filter)

Here we create a function that takes in input a dictionnary of a game event and gives in output the keys that we'll need in our model

In [4]:
def GameEvent_Processing(game_event):
    """
    Process a game event dictionary to extract and format relevant data for a model.

    The function examines the 'eventType' within the game event data to structure
    the output according to the event type, such as game information, turret destruction,
    jungle monster kills, champion kills, stats updates, and game end events. Unnecessary keys
    are removed, and team IDs are converted to team colors. The function outputs a dictionary
    tailored to include only the keys needed for the model's input.

    Parameters:
    game_event (dict): A dictionary representing the game event data.

    Returns:
    dict: A processed dictionary with structured data for model input or the original
          game event data if no processing is done.
    """
    processed_output=None
    eventtype=game_event.get('eventType')
    def convert_team_id(teamID):
        if teamID == 200:
            return 'red'
        elif teamID == 100:
            return 'blue'

        return None
    keys_to_remove = ['eventTime','eventType','platformGameId','gameTime',
                      'stageID','sequenceIndex','gameName','playbackID']
    for key in keys_to_remove:
        game_event.pop(key,None)


    # first eventtype, always game_info :
    #"eventType": "game_info", "platformGameId": "ESPORTSTMNT01:3363718", "participants": [{"keystoneID": 8021, "hashedIP": "3VoTUXLOxMy1OmdTUitHpqfdYIc=", "teamID": 100, "participantID": 1, "championName": "Graves", "accountID": 2799125876114848, "abGroup": "", "perks": [{"perkIds": [8021, 8009, 9103, 8299, 8242, 8473, 5005, 5008, 5002], "perkStyle": 8000, "perkSubStyle": 8400}], "summonerName": "ECG Nate", "summonerLevel": 30}

    if eventtype =='game_info':
        blue_team=[]
        red_team=[]
        for player_dict in game_event.get('participants',None):
            player_info={
                'participantID': player_dict.get('participantID', None),
                'summonerName': player_dict.get('summonerName',None),
                'championName': player_dict.get('championName',None)
      }
            team_side = player_dict.get('teamID',None)
            if team_side == 100:
                blue_team.append(player_info)
            elif team_side == 200:
                red_team.append(player_info)
        processed_output = {
        'blue': blue_team,
        'red': red_team
    }
    # plating
    if eventtype == 'turret_plate_destroyed':
        processed_output = {
        'team': convert_team_id(game_event.get('teamID')),
        'lane': game_event.get('lane')
    }
        
    # turret destroyed   
    if eventtype == 'building_destroyed':
        processed_output = {
        'team': convert_team_id(game_event.get('teamID')),
        'lane': game_event.get('lane'),
        'buildingType': game_event.get('buildingType')
    }
    if game_event.get('turretTier',None):
      processed_output.update({'turretTier': game_event.get('turretTier')})

  #Jungle monster kills:
    if eventtype == 'epic_monster_kill':
      processed_output = {
        'monsterType': game_event.get('monsterType'),
        'killer': game_event.get('killer'),
        'team': convert_team_id(game_event.get('killerteamID')),
        'inEnemyJungle': game_event.get('inEnemyJungle'),
    }


  #Deliberate champion kills
    if eventtype == 'champion_kill':
      processed_output = {
        'killerTeam': convert_team_id(game_event.get('killerTeamID',None)),
        'victimTeam': convert_team_id(game_event.get('victimTeamID',None)),
        'killer': game_event.get('killer'),
        'assistants': game_event.get('assistants'),
        'position': game_event.get('position')
    }


  #Stats updates
    if eventtype == 'stats_update':

      tracked_stats = ['TOTAL_DAMAGE_DEALT_TO_CHAMPIONS', 'TOTAL_DAMAGE_TAKEN',
                     'TIME_CCING_OTHERS', 'VISION_SCORE',
                     'NEUTRAL_MINIONS_KILLED',
                     'NEUTRAL_MINIONS_KILLED_YOUR_JUNGLE',
                     'NEUTRAL_MINIONS_KILLED_ENEMY_JUNGLE']

      info_dump = []

      for participant in game_event.get('participants'):
      #All participants have specific data points that we want to acquire
        participant_data = {
          'participantID': participant.get('participantID'),
          'XP': participant.get('XP'),
          'totalGold': participant.get('totalGold'),
      }

      #We also want specific stats to appear in our data log:
        for stat_category in participant['stats']:

          if stat_category.get('name') in tracked_stats:
            participant_data.update({stat_category.get('name'):
                                  stat_category.get('value')})

        info_dump.append(participant_data)

      blue_status = None
      red_status = None
      team_status = game_event.get('teams',None)

      for side in team_status:
        indicator = side.pop('teamID')
        if indicator == 100:
          blue_status = side
        elif indicator == 200:
          red_status = side
      
          

    #Afterwards, we ship it alongside the teams info (which might be redundant)
      processed_output = {
      'participants':info_dump,
      'blue': blue_status,
      'red': red_status
    }

  #Endgame event contains the winner:
    if eventtype == 'game_end':
      processed_output = {'winningTeam':
                        convert_team_id(game_event.get('winningTeam',None))}


    if processed_output:
      return processed_output


    return game_event


In [5]:
def process_game(game_json):
  """
    Process a JSON array of game events and compile a list of essential events for game analysis.

    The function starts by recording the initial game information such as timestamp, platform ID, 
    and patch version. It then iterates over each game event, converting timestamps and filtering
    events based on specific criteria. Key game moments such as 10 and 15-minute marks, as well as 
    endgame stats, are specifically tracked and recorded. The output is an event list that contains 
    structured data for each relevant game event, providing a timeline of the game for subsequent analysis.

    Parameters:
    game_json (list of dicts): A JSON array where each element is a dictionary representing a game event.

    Returns:
    list: A list of dictionaries, each containing structured data for a key game event, including 
          time of the event, event type, and processed event data.
    """
  event_list = []

  #Take data that describes our game at the moment:
  initial_timestamp = game_json[0].get('eventTime')
  platform_id = game_json[0].get('platformGameId')
  patch_info = game_json[0].get('gameVersion')
  event_list.append({
      'gameDate': initial_timestamp,
      'esportsPlatformId': platform_id,
      'gameVersion': patch_info
  })

  #Convert our timestamp for use in the upcoming game log
  initial_timestamp = dt.datetime.fromisoformat(initial_timestamp.replace('Z','+00:00'))

  #Which stat updates do we want to record?
  stat_update_obtained = {
      600:False, #10-minute updates
      900:False, #15-minute updates
      'Endgame':False #Final game update
  }

  for game_event in game_json:

      event_timestamp = game_event.get('eventTime')
      event_timestamp = dt.datetime.fromisoformat(event_timestamp.replace('Z','+00:00'))
      game_timer = (event_timestamp - initial_timestamp) / dt.timedelta(seconds=1)

      def build_event_dict(my_timestamp,current_event,label=None):
        event_label =  label if label is not None else current_event.get('eventType',None)
        processed_event = GameEvent_Processing(current_event)
        output_dict = {
            'gameTime': my_timestamp,
            'eventType': event_label
        }
        output_dict.update(processed_event)
        return output_dict

      if filter_out_events(game_event):
        event_list.append(build_event_dict(game_timer,game_event))

      elif game_event.get('eventType',None) == 'stats_update':

          #10-minute mark stat update
          if not stat_update_obtained[600] and (game_timer>=600):
            event_list.append(build_event_dict(game_timer,game_event,
                                              'game_state_10mn'))
            stat_update_obtained[600]=True

          #15-minute mark stat update
          if not stat_update_obtained[900] and (game_timer>=900):
            event_list.append(build_event_dict(game_timer,game_event,
                                                'game_state_15mn'))
            stat_update_obtained[900]=True

          #Endgame stat update
          if not stat_update_obtained['Endgame'] and game_event.get('gameOver',False):
            event_list.append(build_event_dict(game_timer,game_event,
                                                'game_state_end'))
            stat_update_obtained['Endgame']=True
  return event_list

In [72]:
from json import JSONDecodeError
from tqdm import tqdm
from itertools import islice

dossier_games = 'games'
dossier_esports = 'esports-data'

with open("./esports-data/mapping_data.json", 'r') as mapping_file:
    mappings_data_list = json.load(mapping_file)

mappings_data = {item['platformGameId']: item for item in mappings_data_list}

# List only files with the `.json` extension without building intermediary lists
games = (f for f in os.listdir(dossier_games) if os.path.isfile(os.path.join(dossier_games, f)) and f.endswith('.json') and os.path.getsize(os.path.join(dossier_games, f)) > 1048576) 
games_to_process = islice(games, 203, None)
for game in tqdm(games):

    chemin_complet = os.path.join(dossier_games, game)
    try :
        with open(chemin_complet, 'r') as f:
            game_json = json.load(f)
    except JSONDecodeError as e:
        print(f"File {game} got unexpected error : {e}")

    try :
        game_json_filtered=process_game(game_json)
        with open(chemin_complet, 'w') as f:
            json.dump(game_json_filtered,f)
    
        # Vous pouvez choisir d'ignorer ce fichier, de le sauvegarder ailleurs, etc.
    except AttributeError:
        print(f'The game {game} has been already modified')
    


1it [00:00,  1.48it/s]

File ESPORTSTMNT02_3170169.json got unexpected error : Unterminated string starting at: line 1 column 30408700 (char 30408699)
The game ESPORTSTMNT02_3170169.json has been already modified


2it [00:02,  1.38s/it]

File ESPORTSTMNT02_3170174.json got unexpected error : Expecting ':' delimiter: line 1 column 103809025 (char 103809024)
The game ESPORTSTMNT02_3170174.json has been already modified


2it [00:02,  1.41s/it]


## Leagues Data 

In [7]:
#Alternatively:
def filter_bad_tournaments():
    #If we haven't pre-cleaned things:
    if not os.path.isfile('esports-data/leagues-cleaned.json'):
        leagues_df = pd.read_json('esports-data/leagues.json')
        #Clear TFT and All-Star tournaments from our leagues
        try:
            leagues_df.drop(axis=0,labels=[37,17],inplace=True)
        except:
            print('We have already cleared the rows related to All-Star and TFT.')
        #Drop extraneous columns
        try:
            leagues_df.drop(axis=1, columns=['displayPriority','image','sport',
                                            'lightImage','darkImage','slug'],
                            inplace=True)
        except:
            print('We have already cleared the columns that we did not need.')
        leagues_df.to_json('esports-data/leagues-cleaned.json')

In [15]:
leagues_df=pd.read_json('./esports-data/leagues-cleaned.json')
leages_df

Unnamed: 0,id,name,region,priority,tournaments
0,98767991299243168,LCS,NORTH AMERICA,1,"[{'id': '110303581083678395'}, {'id': '1095170..."
1,109511549831443328,LCS Challengers,NORTH AMERICA,1000,"[{'id': '110574243270525539'}, {'id': '1095186..."
2,109518549825754240,LCS Challengers Qualifiers,NORTH AMERICA,1000,"[{'id': '110894544221659776'}, {'id': '1107338..."
3,107898214974993344,College Championship,NORTH AMERICA,1000,[{'id': '107898708099217418'}]
4,98767991332355504,CBLOL,BRAZIL,204,"[{'id': '110413046183015975'}, {'id': '1095236..."
5,98767991310872064,LCK,KOREA,4,"[{'id': '110909366079472439'}, {'id': '1103715..."
6,98767991355908944,LCL,COMMONWEALTH OF INDEPENDENT STATES,209,"[{'id': '107417471555810057'}, {'id': '1062697..."
7,105709090213554608,LCO,OCEANIA,207,"[{'id': '109675608880518030'}, {'id': '1096756..."
8,98767991302996016,LEC,EMEA,3,"[{'id': '110848560874526298'}, {'id': '1104293..."
9,98767991349978720,LJL,JAPAN,208,"[{'id': '110395308894756789'}, {'id': '1097102..."


## Tournaments Data 

In [39]:
tournaments_df=pd.read_json('./esports-data/tournaments.json')
new_leagues_df=leagues_df[['id','name']].rename(columns={'id':'leagueId','name':'leagueName'})
new_tournaments=tournaments_df.merge(new_leagues_df,on='leagueId').sort_values(by='startDate',ascending=False)


In [52]:
new_tournaments.to_json('esports-data/tournaments-cleaned.json')

## Mapping data 

In [68]:
mapping_test=[]
with open('./esports-data/mapping_data.json') as f :
    mapping_data=json.load(f)

for entry in mapping_data:
    game_id=entry.get('esportsGameId')
    platformgameid=entry.get('platformGameId')
    teammapping=entry.get("teamMapping")
    red_team=teammapping.get('200')
    blue_team=teammapping.get('100')
    participantmapping=entry.get('participantMapping')
    dict_mapping={'esportsGameId': game_id ,'PlatformGameId': platformgameid, 'Red' : red_team, 'Blue' : blue_team}
    for participant_id, participant_info in participantmapping.items():
        dict_mapping[participant_id] = participant_info


    mapping_test.append(dict_mapping)
mapping_test_df=pd.DataFrame(mapping_test)



In [70]:
mapping_test_df.to_json('./esports-data/mapping_data_cleaned.json')

## Stages 

In [49]:
stage_value=new_tournaments.at[9,'stages']

The history saving thread hit an unexpected error (OperationalError('database or disk is full')).History will not be written to the database.


In [50]:
#So, let's get every tournament's games and participants:
list_of_games = []
column_names = ['stageName','id','Red','Blue','winner']
for tour_stage in stage_value:
    #Iterating on both regular season and playoffs
    stage_name = tour_stage.get('name',None)
    stage_sections = tour_stage.get('sections',None)

    for stage_section in stage_sections:
        stage_matches = stage_section.get('matches',None)

        #If we have something and it isn't downright empty:
        for stage_match in stage_matches:
            stage_games = stage_match.get('games',None)

            if stage_games:
                for stage_game in stage_games:
                    #Only completed games matter
                    if stage_game.get('state',None) == 'completed':
                        game_id = stage_game.get('id',None)
                        game_red = None
                        game_blue = None
                        game_winner = None
                        for team in stage_game.get('teams',None):
                            game_red = team.get('id') if team.get('side') == 'red' else game_red
                            game_blue = team.get('id') if team.get('side') == 'blue' else game_blue
                            game_winner = team.get('side') if team.get('result').get('outcome') == 'win' else game_winner
                        list_of_games.append([stage_name,game_id,game_red,game_blue,game_winner])

stages_df = pd.DataFrame(list_of_games,columns=column_names)

[{'name': 'Regular Season',
  'type': None,
  'slug': 'regular_season',
  'sections': [{'name': 'Regular Season',
    'matches': [{'id': '109467311740709831',
      'type': 'normal',
      'state': 'completed',
      'mode': 'classic',
      'strategy': {'type': 'bestOf', 'count': 1},
      'teams': [{'id': '105521706535388095',
        'side': 'blue',
        'record': {'wins': 13, 'losses': 1, 'ties': 0},
        'result': {'outcome': 'win', 'gameWins': 1},
        'players': [{'id': '102787200035816046', 'role': 'bottom'},
         {'id': '105554098262562524', 'role': 'support'},
         {'id': '105515354716470535', 'role': 'top'},
         {'id': '102418332506104195', 'role': 'jungle'},
         {'id': '107614656149746484', 'role': 'mid'}]},
       {'id': '105521637552587612',
        'side': 'red',
        'record': {'wins': 9, 'losses': 5, 'ties': 0},
        'result': {'outcome': 'loss', 'gameWins': 0},
        'players': [{'id': '104738067226752196', 'role': 'top'},
         {