Pre-processing the data to get it into a Numpy array

Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

!git clone https://github.com/bbenip/tetris-ai.git
!cd tetris-ai/data

Mounted at /content/drive
Cloning into 'tetris-ai'...
remote: Enumerating objects: 105, done.[K
remote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 105 (delta 3), reused 9 (delta 1), pack-reused 93[K
Receiving objects: 100% (105/105), 36.35 MiB | 33.33 MiB/s, done.
Resolving deltas: 100% (35/35), done.


Install dependencies

In [None]:
# @title Decompress game files
games_password = '' # @param {type:"string"}

!pip install unrar
!unrar x -p{games_password} /content/tetris-ai/data/games/games.rar /content/tetris-ai/data/games

In [4]:
!pip install h5py



In [5]:
import os
import json
import numpy as np

In [7]:
MOVES_FOLDER_PATH = os.path.dirname('/content/tetris-ai/data/games/')
print(MOVES_FOLDER_PATH)

/content/tetris-ai/data/games


In [8]:
MOVES_TABLE = {
    "MOVE_LEFT": 0,
    "MOVE_RIGHT": 1,
    "ROTATE_LEFT": 2,
    "ROTATE_RIGHT": 3,
    "ROTATE_180": 4,
    "SOFT_DROP": 5,
    "HARD_DROP": 6,
    "DAS_LEFT": 7,
    "DAS_RIGHT": 8,
    "HOLD_BLOCK": 9
}

In [28]:
# Load moves from JSON file
def loadMoves(filepath):
    heldBlocks = np.empty((0, 1), dtype=int)
    activeBlocks = np.empty((0, 1), dtype=int)
    blockPositions = np.empty((0, 2), dtype=int)
    blockRotations = np.empty((0, 1), dtype=int)
    movesGrids = np.empty((0, 10), dtype=int)
    movesY = np.empty((0, 1), dtype=int)
    with open(filepath, 'rb') as f:
        data = json.load(f)
        for move in data:
            heldBlock = move["heldBlock"]["id"] + 1 if move["heldBlock"] else 0 #Shift id values up by 1 to allow null
            activeBlock = move["activeBlock"]["id"] + 1 #Just so the block ids match up
            blockPos = [move["activeBlock"]["pos"]["x"], move["activeBlock"]["pos"]["y"]]
            blockRot = move["activeBlock"]["rot"]
            currGrid = np.array(move["oldBoard"], dtype=int) #(20,10)
            nextMove = MOVES_TABLE[move["move"]]

            # Append to the NumPy array
            heldBlocks = np.append(heldBlocks, heldBlock)
            #print(heldBlocks.shape)
            activeBlocks = np.append(activeBlocks, activeBlock)
            blockPositions = np.vstack((blockPositions, blockPos))
            blockRotations = np.append(blockRotations, blockRot)
            movesGrids = np.vstack((movesGrids, currGrid))
            movesY = np.append(movesY, nextMove)
    return (heldBlocks, activeBlocks, blockPositions, blockRotations, movesGrids, movesY)

In [10]:
import h5py
DATA_FOLDER_PATH = "/content/tetris-ai/data/"
DATASET_FILE_NAME = "data.hdf5"
DATASET_FILE_PATH = os.path.join(DATA_FOLDER_PATH, DATASET_FILE_NAME)

In [30]:
def createDatasets(filepath, heldBlocks, activeBlocks, blockPositions, blockRotations, movesGrids, movesY):
  with h5py.File(filepath, "w") as f:
      dsetHeldBlocks = f.create_dataset("X_held_block", data=heldBlocks, maxshape=(None,))
      dsetActiveBlock = f.create_dataset("X_active_block", data=activeBlocks, maxshape=(None,))
      dsetBlockPositions = f.create_dataset("X_block_pos", data=blockPositions, maxshape=(None, 2))
      dsetBlockRotations = f.create_dataset("X_block_rot", data=blockRotations, maxshape=(None,))
      dsetGrids = f.create_dataset("X_grid", data=movesGrids, maxshape=(None, 10))

      dsetY = f.create_dataset("Y", data=movesY, maxshape=(None,))

In [12]:
def appendToDataset(filepath, datasetName, arr):
  with h5py.File(filepath, "a") as hf:
    hf[datasetName].resize((hf[datasetName].shape[0] + arr.shape[0]), axis = 0)
    hf[datasetName][-arr.shape[0]:] = arr

def appendAllToDataset(filepath, heldBlocks, activeBlocks, blockPositions, blockRotations, movesGrids, movesY):
  appendToDataset(filepath, 'X_held_block', heldBlocks)
  appendToDataset(filepath, 'X_active_block', activeBlocks)
  appendToDataset(filepath, 'X_block_pos', blockPositions)
  appendToDataset(filepath, 'X_block_rot', blockRotations)
  appendToDataset(filepath, 'X_grid', movesGrids)

  appendToDataset(filepath, 'Y', movesY)

In [13]:
def printHDF5File(filepath):
  with h5py.File(filepath, 'r') as f1:
      for dset in f1.keys():
        print(f1[dset])

In [32]:
files = [os.path.join(MOVES_FOLDER_PATH, f) for f in os.listdir(MOVES_FOLDER_PATH) if f.endswith('.json')]
for i,f in enumerate(files):
  move_vars = loadMoves(f)

  if i == 0:
    createDatasets(DATASET_FILE_PATH, *move_vars)
  else:
    appendAllToDataset(DATASET_FILE_PATH, *move_vars)

printHDF5File(DATASET_FILE_PATH)

<HDF5 dataset "X_active_block": shape (2847137,), type "<i8">
<HDF5 dataset "X_block_pos": shape (2847137, 2), type "<i8">
<HDF5 dataset "X_block_rot": shape (2847137,), type "<i8">
<HDF5 dataset "X_grid": shape (56942740, 10), type "<i8">
<HDF5 dataset "X_held_block": shape (2847137,), type "<i8">
<HDF5 dataset "Y": shape (2847137,), type "<i8">
