# Data Parsing

## import data:

In [2]:
import json
import numpy as np 

PARSE = True
TEST_PARSE = True

In [3]:
# import data

with open("../data/samples-9x9.json") as json_file:
    data = json.load(json_file)

# data [list]: elements [dict]=> keys ::> ['depth', 'list_of_moves', 'black_stones', 'white_stones', 'rollouts', 'black_wins', 'black_points', 'white_wins', 'white_points']

## utils:

In [4]:
def name_to_coord(s):
    indexLetters = {'A': 0, 'B': 1, 'C': 2, 'D': 3,
                      'E': 4, 'F': 5, 'G': 6, 'H': 7, 'J': 8}
    if s == 'PASS':
        return (-1, -1)

    col = indexLetters[s[0]]
    lin = int(s[1:]) - 1
    return (col, lin)

def flatten(coord):
        if coord == (-1, -1):
            return -1
        return 9 * coord[1] + coord[0]

def name_to_flat(s):
    return flatten(name_to_coord(s))

## Data Parsing

In [5]:
import sys
sys.path.append('../bibli/go_starter_pack/')
from Goban import Board

def parse_game_record(record): # TODO integrate goban
    # record keys =>['depth', 'list_of_moves', 'black_stones', 'white_stones', 'rollouts', 'black_wins', 'black_points', 'white_wins', 'white_points']
    
    # get black and white win_percent in record
    rolls = record['rollouts']
    b_wp = record['black_wins'] / rolls
    w_wp = record['white_wins'] / rolls
    record_depth = record['depth']
    
    # init board array and board/win percent history arrray
    gboard = Board()
    board_hist = np.array([gboard._board])
    vlabels = None
    plabels = None
    
    # flatten moves
    flats = [name_to_flat(m) for m in record['list_of_moves'] ]
    b_flats = [name_to_flat(m) for m in record['black_stones'] ]
    w_flats = [name_to_flat(m) for m in record['white_stones'] ]
 
    # parse move list into the board 
    depth = 0
    for move in flats:
        # print(gboard._stringUnionFind)
        # print(move) 
        # print(depth)
        try:
            gboard.push(move)
        except AssertionError:
            break 
         
        if move in b_flats:
            wp = w_wp
        elif move in w_flats:
            wp = b_wp
        else :
            break

        if depth==0:
            vlabels = np.array([wp])
            plabels = np.array([move])
        else:
            vlabels = np.vstack((vlabels, wp))
            plabels = np.vstack((plabels, move))
        
        if depth != record_depth-1:
            board_hist = np.vstack((board_hist, gboard._board))
        
        depth += 1
        
    
    if depth != record_depth:
        board_hist, plabels, vlabels = np.array([]), np.array([]), np.array([])
    else:
        board_hist = board_hist.reshape((-1,9,9))


    return board_hist, plabels, vlabels, depth


# record = data[420]
record = data[-1]
print(record)
if TEST_PARSE:
    board_hist, plabels, vlabels, depth = parse_game_record(record)
    print('board hist shape', board_hist.shape, np.any(board_hist))
    print('plabels shape', plabels.shape)
    print('vlabels shape', vlabels.shape)
    if len(board_hist) > 0:
        print('parsing successful')
        print('expected depth', depth, 'actual depth:', board_hist.shape[0])
    else:
        print('parsing aborted')
        print('expected depth', depth, 'actual depth:', board_hist.shape[0])
        

{'depth': 29, 'list_of_moves': ['E6', 'F4', 'C5', 'C3', 'G7', 'E4', 'D6', 'G5', 'H6', 'H5', 'F7', 'G6', 'J7', 'B4', 'J5', 'B6', 'J4', 'B5', 'F5', 'H3', 'D8', 'B8', 'J3', 'H4', 'D4', 'D3', 'H2', 'C4', 'G2'], 'black_stones': ['D8', 'F7', 'G7', 'J7', 'D6', 'E6', 'H6', 'C5', 'F5', 'J5', 'D4', 'J4', 'J3', 'G2', 'H2'], 'white_stones': ['B8', 'B6', 'G6', 'B5', 'G5', 'H5', 'B4', 'C4', 'E4', 'F4', 'H4', 'C3', 'D3', 'H3'], 'rollouts': 100, 'black_wins': 63, 'black_points': 381.0, 'white_wins': 37, 'white_points': 229.0}
board hist shape (29, 9, 9) True
plabels shape (29, 1)
vlabels shape (29, 1)
parsing successful
expected depth 29 actual depth: 29


In [6]:
from tqdm import tqdm

def parse_data(data, view_bar=True): # TODO correct bug in board.push
    record = data[0]
    board_hist, r_plabels, r_vlabels, _ = parse_game_record(record)
    count = 1
    board_data = np.array(board_hist)
    plabels = np.array(r_plabels)
    vlabels = np.array(r_vlabels)
    
    iterator = tqdm(data[1:]) if view_bar else data[1:] 
    for record in iterator:
        board_hist, r_plabels, r_vlabels, _ = parse_game_record(record)
        if len(board_hist) > 0:
            count+=1
            board_data = np.vstack((board_data, board_hist))
            plabels = np.vstack((plabels, r_plabels))
            vlabels = np.vstack((vlabels, r_vlabels))
            
    
    return board_data, plabels, vlabels, count

# TODO check if output is in correct format
if PARSE:
    board_data, plabels, vlabels, count = parse_data(data)
    print(count)

100%|██████████| 41562/41562 [03:19<00:00, 208.21it/s]

26781





In [7]:
liste_board =[]
# print('board data', board_data[4], "plabels", plabels[4])
# print("value label",vlabels)

print(np.where(board_data[2] !=2,board_data[2],0))


[[0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]]


In [77]:
from tqdm import tqdm
import time 

def separate_board(board):
    black = np.vectorize(lambda x: 0 if x==2 else x)(board) 
    white = np.vectorize(lambda x: 0 if x==1 else int(x/2))(board)
    return black, white

list_players = []
for i in range(len(board_data[:10])//2):
    list_players.append(0)
    list_players.append(1)


def boards_to_inputs(board_data, players_list, n_channels=7):
    # output 19 * 19 * 48
    assert n_channels%2 == 1
    n_boards = n_channels//2
    print("n_boards", n_boards)
    inputs = np.zeros((len(board_data), n_channels,9,9))
    # stone colours - player stone 1 - opponent stone 2 - empty
    
    #inter_board = np.vstack((inter_board, np.ones((2,9,9))))
    for i in range(len(board_data)):
        inter_board = np.zeros((n_channels, 9, 9))
        c = 0
        for j in range(0, n_boards+1): 
            if c == 6: 
                inter_board[c] =  np.zeros((1,9,9)) if list_players[i] == 0 else np.ones((1,9,9))
                break; 
                # black is one white is 2
            if i-j >= 0:
                black, white = separate_board(board_data[i-j]) 
            else: 
                black, white = separate_board(np.zeros((1,9,9)))
            inter_board[c] =  black.reshape((1,9,9))
            inter_board[c+1] =  white.reshape((1,9,9))
            c += 2            
        inputs[i]= inter_board
    return inputs
#inputs = boards_to_inputs(board_data[:5], n_channels=7 )
result = boards_to_inputs(board_data[:4], list_players, 7)
print("result data 0", result[1][6])

#print("inter board shape", inputs)


n_boards 3
result data 0 [[1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1.]]


In [59]:
print("board data", board_data)

board data [[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 2 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 2 ... 0 2 1]
  ...
  [0 0 0 ... 1 0 1]
  [0 2 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 1 0]
  [0 0 2 ... 0 2 1]
  ...
  [0 0 0 ... 1 0 1]
  [0 2 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 1 0]
  [0 0 2 ... 0 2 1]
  ...
  [0 0 0 ... 1 0 1]
  [0 2 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]


In [None]:
a = np.array([0, 0, 0, 0, 0])

print(not np.any(a))

True


In [None]:
import torch
print('Saving')
board_save, plabels_save, vlabels_save = board_data, plabels, vlabels


b_t = torch.FloatTensor(board_save).to(torch.float)
p_t = torch.FloatTensor(plabels_save).to(torch.float)
v_t = torch.FloatTensor(vlabels_save).to(torch.float)

print(b_t.dtype, p_t.dtype, v_t.dtype)
torch.save(b_t, 'board_data.pt')
torch.save(p_t, 'plabels.pt')
torch.save(v_t, 'vlabels.pt')

Saving
torch.float32 torch.float32 torch.float32


In [None]:
print(b_t.dtype, p_t.dtype, v_t.dtype)


torch.float32 torch.int32 torch.int32
