In [14]:
from game import Chessboard
import hashlib
import game
from tqdm import tqdm
import json
import traceback
from glob import glob
import pickle

In [2]:
# Load expert list
with open("experts.txt", "r") as f:
    expert_ids = f.readlines()
    expert_ids = set(map(lambda x: x.strip(), expert_ids))

In [6]:
# match: (board, move, value)
def process_match(match, all_matches: dict):
    chessboard = Chessboard()
    player_ids = (match["players"][0]["bot"], match["players"][1]["bot"])

    last_round = match["log"][-2]
    winner = int(next(iter(last_round.keys())))
    both_experts = player_ids[0] in expert_ids and player_ids[1] in expert_ids

    for i, round in enumerate(match["log"]):
        player, round_value = next(iter(round.items()))

        # Filter out system log
        if player != "0" and player != "1":
            continue

        player = int(player)
        res = round_value["response"]
        move = [res["x0"], res["y0"], res["x1"], res["y1"], res["x2"], res["y2"]]

        # Save only expert rounds
        if player_ids[player] in expert_ids:
            # if this is the last round, check whether the move is correct
            if i == len(match["log"]) - 2 and not game.is_valid_act(
                chessboard.board, player + 1, *move
            ):
                raise Exception("Invalid move")

            key = hashlib.md5(chessboard.board.tobytes()).hexdigest()
            value = 0
            if both_experts:
                value = 1 if player == winner else -1

            if key not in all_matches:
                all_matches[key] = (chessboard.board.copy(), move, value)

        chessboard.act(" ".join(map(str, move)))

In [11]:
all_matches = dict()
# Process data
with open("data/raw/extracted/output/expert_matches.txt", "r") as f:
    matches = f.readlines()
    error_count = 0
    for i, match_str in enumerate(tqdm(matches, unit="match"), 1):
        try:
            process_match(json.loads(match_str), all_matches)
        except Exception as e:
            # tqdm.write(f"Error processing match {i}")
            # traceback.print_exception(type(e), e, e.__traceback__)
            error_count += 1
            continue
print(f"Total matches: {len(all_matches)}")
print(f"Erorr matches: {error_count}")

100%|██████████| 39487/39487 [00:30<00:00, 1287.46match/s]


In [15]:
# Load existing data
init_input_paths = glob("data/init/*.pickle", recursive=False)
init_inputs = {}
for path in tqdm(init_input_paths, unit="file"):
    init_inputs = {**init_inputs, **pickle.load(open(path, "rb"))}

100%|██████████| 5/5 [00:02<00:00,  2.35file/s]


In [18]:
# Merge data
wrong = 0
for key, value in tqdm(init_inputs.items(), unit="match"):
    (board, answer) = value
    # check answer is correct
    if not game.is_valid_act(board, board[answer[1],answer[0]], *answer):
        print("Invalid answer")
        wrong += 1
        continue
    else:
        key = hashlib.md5(board.tobytes()).hexdigest()
        all_matches[key] = (board, answer, 0)

print(f"Load: {len(init_inputs)}")
print(f"Wrong: {wrong}")

100%|██████████| 477310/477310 [00:11<00:00, 42144.46match/s]

Load: 477310
Wrong: 0





In [19]:
# Statistics for all matches
total = len(all_matches)
has_value = len(list(filter(lambda x: x[2] != 0, all_matches.values())))
print(f"Total matches: {total}")
print(f"Matches with value: {has_value}")

Total matches: 790951
Erorr matches: 9623
Matches with value: 37647


In [20]:
# Save data
pickle.dump(all_matches, open("data/all_matches.pickle", "wb"))

: 

In [None]:
# Data augmentation