In [35]:
import re
from typing import List

#### Loading from Game_log file

The next block builds the necessary functions to perform minimal pre-processing on the log file. From an encoded log file to a list of game actions. The cleaning is not perfect and some traces of the encoding characters still persist.

In [38]:
def load_log_from_file(log_path:str)-> str:
    with open(log_path, 'r', encoding = "latin-1") as log_file:
        log = log_file.read()  
    return log 

def find_and_mask_players(log : str) -> str:
    players = re.findall("@P([^\s]+)* rolled",log)
    log = re.sub(players[0],"<P1>",log)
    log = re.sub(players[1],"<P2>",log)
    return log

def clean_action(action : str) -> str:
    # clear extra spaces
    action = re.sub("\s+"," ", action)
    # clear gibberish after final period
    action = re.sub("\.(.*)$", ".", action)
    if bool(re.match("^Turn",action)):
        action = re.sub(">(.*)$", ">", action)
    return action

def dump_game_actions_to_file(actions:List[str], output_file:str):
        with open(output_file,"w") as file:
            for action in actions:
                file.write(action + "\n")

def process_game_log(log_path: str, output_dir:str):
    log_id = re.search("GameLog_(.*)\.dat",log_path).group(1)
    log = load_log_from_file(log_path)
    log = find_and_mask_players(log)
    # split into game actions and ignore first, as it just containes the game ids
    log = log.split("@P")
    log = log[1:]
    # process actions
    actions = []
    for action in log:
        action = clean_action(action)
        if len(action) > 0:
            actions.append(action)
    dump_game_actions_to_file(actions, output_dir+log_id+".txt")
    

In [40]:
process_game_log(
    log_path='../data/examples/game_logs_raw/Match_GameLog_30033c5b-953f-4761-a205-b6bc59d454f9.dat',
    output_dir='../data/examples/game_logs_clean/'
)


#### Mask Cards

As we masked the players, we also want to mask the names of the cards (as they carry semantic connotations) and mask them with an id. We will use the same ids that magic online uses. 
Relevant endpoint: https://scryfall.com/docs/api/cards/mtgo

In [33]:
def find_card_names(action:str) -> List[str]:
    """ 
        oÛK%@PAje8 plays @[Forest@:163744,544:@] -> ["Forest"] 
    """
    return re.findall("@\[(.*)@\:",action)

def find_card_ids(action:str) -> List[str]:
    """ 
        oÛK%@PAje8 plays @[Forest@:163744,544:@] -> ["163744"] 
    """
    return re.findall("@\:(.*),",action)