<a href="https://colab.research.google.com/github/chosh84/omok-ai/blob/master/create_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
URL = 'https://raw.githubusercontent.com/chosh84/omok-ai/blob/master/game_result/gomocup2020results.zip'
FILE = ['gomocup2020results.zip', 'gomocup2021results.zip']
FOLDER = ['gomocup2020results','gomocup2021results']


In [2]:
from zipfile import ZipFile
print('Unzipping zipfile...')
for filename, foldername in zip(FILE, FOLDER):
  with ZipFile(filename) as zip_data:
    zip_data.extractall(foldername)


Unzipping zipfile...


In [3]:
import numpy as np
import os
from glob import glob
from tqdm import tqdm

'''
Dataset from https://gomocup.org/results/
'''
game_rule = 'Freestyle' # Freestyle, Fastgame, Standard, Renju
base_path = '/content/gomocup20*'
output_path = '/content/dataset/'
os.makedirs(output_path, exist_ok=True)
file_list = glob(os.path.join(base_path, '%s*/*.psq' % (game_rule, )))

print(file_list)

['/content/gomocup2021results/Freestyle1/10_1_9_2.psq', '/content/gomocup2021results/Freestyle1/9_2_11_2.psq', '/content/gomocup2021results/Freestyle1/1_6_7_2.psq', '/content/gomocup2021results/Freestyle1/7_0_6_1.psq', '/content/gomocup2021results/Freestyle1/6_3_9_1.psq', '/content/gomocup2021results/Freestyle1/0_9_3_1.psq', '/content/gomocup2021results/Freestyle1/10_2_5_2.psq', '/content/gomocup2021results/Freestyle1/2_1_9_1.psq', '/content/gomocup2021results/Freestyle1/3_11_8_1.psq', '/content/gomocup2021results/Freestyle1/11_11_1_1.psq', '/content/gomocup2021results/Freestyle1/6_11_1_2.psq', '/content/gomocup2021results/Freestyle1/1_0_2_1.psq', '/content/gomocup2021results/Freestyle1/9_8_3_2.psq', '/content/gomocup2021results/Freestyle1/5_11_0_2.psq', '/content/gomocup2021results/Freestyle1/5_0_4_1.psq', '/content/gomocup2021results/Freestyle1/2_10_1_1.psq', '/content/gomocup2021results/Freestyle1/8_11_10_1.psq', '/content/gomocup2021results/Freestyle1/8_1_2_2.psq', '/content/gomocu

In [4]:
for index, file_path in enumerate(tqdm(file_list)):
    with open(file_path, 'r') as f:
        lines = f.read().splitlines() 

    w, h = lines[0].split(' ')[1].strip(',').split('x')
    w, h = int(w), int(h)

    lines = lines[1:]

    inputs, outputs = [], []
    board = np.zeros([h, w], dtype=np.int8)

    for i, line in enumerate(lines):
        if ',' not in line:
            break

        x, y, t = np.array(line.split(','), np.int8)

        if i % 2 == 0:
            player = 1
        else:
            player = 2

        input = board.copy().astype(np.int8)
        input[(input != player) & (input != 0)] = -1
        input[(input == player) & (input != 0)] = 1

        output = np.zeros([h, w], dtype=np.int8)
        output[y-1, x-1] = 1

        # augmentation
        # rotate 4 x flip 3 = 12
        for k in range(4):
            input_rot = np.rot90(input, k=k)
            output_rot = np.rot90(output, k=k)

            inputs.append(input_rot)
            outputs.append(output_rot)

            inputs.append(np.fliplr(input_rot))
            outputs.append(np.fliplr(output_rot))

            inputs.append(np.flipud(input_rot))
            outputs.append(np.flipud(output_rot))

        # update board
        board[y-1, x-1] = player

    # save dataset
    np.savez_compressed(os.path.join(output_path, '%s.npz' % (str(index).zfill(5))), inputs=inputs, outputs=outputs)

100%|██████████| 22536/22536 [05:15<00:00, 71.37it/s]


In [5]:
from zipfile import ZipFile
import os
from os.path import basename

# create a ZipFile object
with ZipFile('dataset.zip', 'w') as zipObj:
  # Iterate over all the files in directory
  for folderName, subfolders, filenames in os.walk('/content/dataset/'):
    for filename in filenames:
      #create complete filepath of file in directory
      filePath = os.path.join(folderName, filename)
      # Add file to zip
      zipObj.write(filePath, basename(filePath))