# Preprocess
<b>Neural Network Architecture</b> The input to the neural network is a 19 × 19 × 17 image stack
comprising 17 binary feature planes. 8 feature planes $X_t$ consist of binary values indicating the
presence of the current player’s stones ($X_t^i = 1$ if intersection $i$ contains a stone of the player’s
colour at time-step $t$; $0$ if the intersection is empty, contains an opponent stone, or if $t < 0$). A
further 8 feature planes, $Y_t$ , represent the corresponding features for the opponent’s stones. The
final feature plane, $C$, represents the colour to play, and has a constant value of either 1 if black
is to play or 0 if white is to play. These planes are concatenated together to give input features
$s_t = [X_t , Y_t , X_{t−1} , Y_{t−1} , ..., X_{t−7} , Y_{t−7} , C]$.

In [1]:
from preprocessing import Preprocess
from go import GameState, BLACK, WHITE, EMPTY
import os, sgf
import numpy as np

# input SGF files
FILE_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','go')
# output archive SGF files
SUCCEED_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','succeed')
FAIL_FOLDER = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_files','fail')
# output database
TRAIN_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data', 'zero', 'train_data')
TEST_DATA = os.path.join(os.path.expanduser('~'), 'python', 'tutorial_data', 'zero', 'test_data')

# Config this to indicate whether it's training or testing data
DATA_FOLDER = TEST_DATA

# BOARD_POSITION contains SGF symbol which represents each row (or column) of the board
# It can be used to convert between 0,1,2,3... and a,b,c,d...
# Symbol [tt] or [] represents PASS in SGF, therefore is omitted
BOARD_POSITION = 'abcdefghijklmnopqrs'

# Only 3 features are needed for AlphaGo Zero
# 0 - Player Stone, 1 - Opponent Stone, 3 - Current Player Color
DEFAULT_FEATURES = ["board", "color"]

# reverse the index of player/opponent
# 0,2,4,6... are player, 1,3,5,7... are opponent
OPPONENT_INDEX = [1,0,3,2,5,4,7,6,9,8,11,10,13,12]

## Define DB output
  [LevelDB](http://leveldb.org/) is preferred database because it automatically use [Snappy](https://github.com/google/snappy) to compress the data.

In [2]:
from caffe2.python import core, utils
from caffe2.proto import caffe2_pb2

def write_db(db_type, db_name, base_name, features, labels, rewards):
    db = core.C.create_db(db_type, db_name, core.C.Mode.write)
    transaction = db.new_transaction()
    for i in range(features.shape[0]):
        feature_and_label = caffe2_pb2.TensorProtos()
        feature_and_label.protos.extend([
             utils.NumpyArrayToCaffe2Tensor(features[i]),
             utils.NumpyArrayToCaffe2Tensor(labels[i]),
             utils.NumpyArrayToCaffe2Tensor(rewards[i])
        ])
        transaction.put(
            '{}_{:0=3}'.format(base_name,i),
            feature_and_label.SerializeToString())
    # Close the transaction, and then close the db.
    del transaction
    del db



## Parse SGF game file
  Parse the SGF file. SGF file use characters a to s to represents line 1 to 19. We convert SGF to Caffe2 Tensor. And write back database in batch of game.

In [3]:
#%%capture output
p = Preprocess(DEFAULT_FEATURES)
for dirname, subDirList, fileList in os.walk(FILE_FOLDER):
    for filename in fileList:
        with open(os.path.join(dirname, filename)) as f:
            collection = sgf.parse(f.read())
            for game in collection:
                # Size of the Board should only be 19x19, Komi should be 7.5 according to Chinese rule
                if (game.nodes[0].properties['SZ'] == ['19']
                    and game.nodes[0].properties['RU'] == ['Chinese']
                    and game.nodes[0].properties['KM'] == ['7.50']):
                    try:
                        state = GameState() # Initialize GameState
                        features = np.empty(shape=(0,17,19,19), dtype=np.int8)
                        feature_history = np.zeros(shape=(1,17,19,19), dtype=np.int8)
                        labels = np.empty(shape=(0,), dtype=np.int32)
                        rewards = np.empty(shape=(0,), dtype=np.float32)
                        reward = np.asarray([1.0 if game.nodes[0].properties['RE'][0:2] == ['B+'] else 0.0], dtype=np.float32)
                        for node in game.nodes[1:]: # Except nodes[0] for game properties
                            feature_current = p.state_to_tensor(state).astype(np.int8) # Player/Opponent/Empty/Color
                            feature_history = np.concatenate((feature_current[0:1,0:2], # Xt, Yt
                                                              feature_history[0:1,OPPONENT_INDEX],
                                                              feature_current[0:1,3:4]), # Color
                                                            axis=1)
                            if 'B' in node.properties and len(node.properties['B'][0]) == 2: # Black move
                                x = BOARD_POSITION.index(node.properties['B'][0][0])
                                y = BOARD_POSITION.index(node.properties['B'][0][1])
                                state.do_move(action=(x,y),color = BLACK)
                            elif 'W' in node.properties and len(node.properties['W'][0]) == 2: # White move
                                x = BOARD_POSITION.index(node.properties['W'][0][0])
                                y = BOARD_POSITION.index(node.properties['W'][0][1])
                                state.do_move(action=(x,y),color = WHITE)
                            features = np.append(features, feature_history, axis=0)
                            labels = np.append(labels, np.asarray([x * 19 + y], dtype=np.int32), axis=0)
                            rewards = np.append(rewards, reward, axis=0)
                        write_db(
                            db_type = 'leveldb',
                            db_name = DATA_FOLDER, # replace this with TRAIN_DATA or TEST_DATA if you want to separate the dataset
                            base_name = os.path.basename(filename),
                            features = features,
                            labels = labels,
                            rewards = rewards
                        )
                        os.rename(f.name,os.path.join(SUCCEED_FOLDER,filename)) # move the file to SUCCEED_FOLDER, so Preprocess can resume after interrupted
                        print('{} succeeded'.format(filename))
                    except Exception as e:
                        os.rename(f.name,os.path.join(FAIL_FOLDER,filename)) # move the file to FAIL_FOLDER, so Preprocess can resume after interrupted
                        print('{} failed dues to {}'.format(filename, e))
                else:
                    os.rename(f.name,os.path.join(FAIL_FOLDER,filename)) # move the file to FAIL_FOLDER, so Preprocess can resume after interrupted
                    print('{} unqualified dues to Size, Rule or Komi'.format(filename))

2017-05-10-25.sgf unqualified dues to Size, Rule or Komi
2017-05-07-3.sgf unqualified dues to Size, Rule or Komi
2017-05-15-10.sgf unqualified dues to Size, Rule or Komi
2017-05-16-20.sgf unqualified dues to Size, Rule or Komi
2017-05-26-28.sgf succeeded
2017-05-29-27.sgf unqualified dues to Size, Rule or Komi
2017-05-27-21.sgf unqualified dues to Size, Rule or Komi
2017-05-07-28.sgf succeeded
2017-05-27-11.sgf unqualified dues to Size, Rule or Komi
2017-05-17-50.sgf succeeded
2017-05-04-5.sgf unqualified dues to Size, Rule or Komi
2017-05-10-9.sgf succeeded
2017-05-10-21.sgf unqualified dues to Size, Rule or Komi
2017-05-28-12.sgf unqualified dues to Size, Rule or Komi
2017-05-13-3.sgf unqualified dues to Size, Rule or Komi
2017-05-17-13.sgf unqualified dues to Size, Rule or Komi
2017-05-12-25.sgf unqualified dues to Size, Rule or Komi
2017-05-03-6.sgf unqualified dues to Size, Rule or Komi
2017-05-05-5.sgf unqualified dues to Size, Rule or Komi
2017-05-02-2.sgf unqualified dues to Si

2017-05-16-26.sgf succeeded
2017-05-05-7.sgf unqualified dues to Size, Rule or Komi
2017-05-07-9.sgf unqualified dues to Size, Rule or Komi
2017-05-07-8.sgf unqualified dues to Size, Rule or Komi
2017-05-08-19.sgf succeeded
2017-05-26-6.sgf unqualified dues to Size, Rule or Komi
2017-05-16-14.sgf unqualified dues to Size, Rule or Komi
2017-05-19-9.sgf unqualified dues to Size, Rule or Komi
2017-05-29-25.sgf unqualified dues to Size, Rule or Komi
2017-05-25-10.sgf unqualified dues to Size, Rule or Komi
2017-05-29-16.sgf unqualified dues to Size, Rule or Komi
2017-05-12-8.sgf unqualified dues to Size, Rule or Komi
2017-05-15-11.sgf unqualified dues to Size, Rule or Komi
2017-05-28-15.sgf unqualified dues to Size, Rule or Komi
2017-05-31-15.sgf unqualified dues to Size, Rule or Komi
2017-05-03-1.sgf unqualified dues to Size, Rule or Komi
2017-05-18-21.sgf unqualified dues to Size, Rule or Komi
2017-05-28-7.sgf succeeded
2017-05-12-24.sgf unqualified dues to Size, Rule or Komi
2017-05-17-3

2017-05-14-4.sgf succeeded
2017-05-31-5.sgf unqualified dues to Size, Rule or Komi
2017-05-04-6.sgf unqualified dues to Size, Rule or Komi
2017-05-24-18.sgf unqualified dues to Size, Rule or Komi
2017-05-30-20.sgf unqualified dues to Size, Rule or Komi
2017-05-13-31.sgf unqualified dues to Size, Rule or Komi
2017-05-07-4.sgf unqualified dues to Size, Rule or Komi
2017-05-25-8.sgf succeeded
2017-05-19-17.sgf unqualified dues to Size, Rule or Komi
2017-05-17-54.sgf unqualified dues to Size, Rule or Komi
2017-05-09-5.sgf succeeded
2017-05-06-39.sgf unqualified dues to Size, Rule or Komi
2017-05-10-17.sgf succeeded
2017-05-17-6.sgf unqualified dues to Size, Rule or Komi
2017-05-12-13.sgf unqualified dues to Size, Rule or Komi
2017-05-30-1.sgf unqualified dues to Size, Rule or Komi
2017-05-05-37.sgf unqualified dues to Size, Rule or Komi
2017-05-14-7.sgf succeeded
2017-05-05-20.sgf unqualified dues to Size, Rule or Komi
2017-05-07-22.sgf succeeded
2017-05-04-20.sgf unqualified dues to Size,

2017-05-13-29.sgf succeeded
2017-05-08-16.sgf unqualified dues to Size, Rule or Komi
2017-05-18-4.sgf unqualified dues to Size, Rule or Komi
2017-05-07-15.sgf succeeded
2017-05-15-16.sgf unqualified dues to Size, Rule or Komi
2017-05-22-25.sgf unqualified dues to Size, Rule or Komi
2017-05-13-12.sgf unqualified dues to Size, Rule or Komi
2017-05-04-12.sgf unqualified dues to Size, Rule or Komi
2017-05-30-2.sgf succeeded
2017-05-26-21.sgf succeeded
2017-05-02-18.sgf unqualified dues to Size, Rule or Komi
2017-05-09-10.sgf unqualified dues to Size, Rule or Komi
2017-05-17-5.sgf unqualified dues to Size, Rule or Komi
2017-05-17-2.sgf unqualified dues to Size, Rule or Komi
2017-05-31-1.sgf unqualified dues to Size, Rule or Komi
2017-05-22-5.sgf unqualified dues to Size, Rule or Komi
2017-05-13-17.sgf unqualified dues to Size, Rule or Komi
2017-05-30-11.sgf unqualified dues to Size, Rule or Komi
2017-05-05-39.sgf unqualified dues to Size, Rule or Komi
2017-05-29-19.sgf succeeded
2017-05-30-