Pre-processing the data to get it into a Numpy array

Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!git clone https://github.com/bbenip/tetris-ai.git
!cd tetris-ai/modify-data

Install dependencies

In [None]:
!pip install h5py

In [41]:
import os
import json
import numpy as np

In [137]:
MOVES_FOLDER_PATH = os.path.dirname('/content/tetris-ai/modify-data/games/')
print(MOVES_FOLDER_PATH)

/content/tetris-ai/modify-data/games


In [48]:
MOVES_TABLE = {
    "MOVE_LEFT": 0,
    "MOVE_RIGHT": 1,
    "ROTATE_LEFT": 2,
    "ROTATE_RIGHT": 3,
    "ROTATE_180": 4,
    "SOFT_DROP": 5,
    "HARD_DROP": 6,
    "DAS_LEFT": 7,
    "DAS_RIGHT": 8,
    "HOLD_BLOCK": 9
}

In [94]:
# Load moves from JSON file
def loadMoves(filepath):
    outGrids = np.empty((0, 10), dtype=int)
    outScalars = np.empty((0, 5), dtype=int)
    outY = np.empty((0, 1), dtype=int)
    with open(filepath, 'rb') as f:
        data = json.load(f)
        for move in data:
            heldBlock = move["heldBlock"]["id"] + 1 if move["heldBlock"] else 0 #Shift id values up by 1 to allow null
            activeBlock = move["activeBlock"]["id"] + 1 #Just so the block ids match up
            blockPos = [move["activeBlock"]["pos"]["x"], move["activeBlock"]["pos"]["y"]]
            blockRot = move["activeBlock"]["rot"]
            currGrid = np.array(move["oldBoard"], dtype=int) #(20,10)
            nextMove = MOVES_TABLE[move["move"]]

            # Append to the NumPy array
            scalars = np.array([heldBlock, activeBlock, blockPos[0], blockPos[1], blockRot], dtype=int) #(6,)
            outScalars = np.vstack((outScalars, scalars))
            outGrids = np.vstack((outGrids, currGrid))
            outY = np.append(outY, nextMove)
    return (outGrids, outScalars, outY)

In [155]:
import h5py

DATASET_FILE_NAME = "data.hdf5"

In [146]:
def createDatasets(filepath, movesGrids, movesScalars, movesY):
  with h5py.File(filepath, "w") as f:
      dsetGrids = f.create_dataset("X_grids", data=movesGrids, maxshape=(None, 10))
      dsetScalars = f.create_dataset("X_scalars", data=movesScalars, maxshape=(None, 5))
      dsetY = f.create_dataset("Y", data=movesY, maxshape=(None,))

In [151]:
def appendToDataset(filepath, datasetName, arr):
  with h5py.File(filepath, "a") as hf:
    hf[datasetName].resize((hf[datasetName].shape[0] + arr.shape[0]), axis = 0)
    hf[datasetName][-arr.shape[0]:] = arr

def appendAllToDataset(filepath, movesGrids, movesScalars, movesY):
  appendToDataset(filepath, 'X_grids', movesGrids)
  appendToDataset(filepath, 'X_scalars', movesScalars)
  appendToDataset(filepath, 'Y', movesY)

In [None]:
def printHDF5File(filepath):
  with h5py.File(filepath, 'r') as f1:
      for dset in f1.keys():
        print(f1[dset])
printHDF5File(DATASET_FILE_NAME)

In [None]:
files = [os.path.join(MOVES_FOLDER_PATH, f) for f in os.listdir(MOVES_FOLDER_PATH) if f.endswith('.json')]
print(files)
for i,f in enumerate(files):
  movesGrids, movesScalars, movesY = loadMoves(f)

  if i == 0:
    createDatasets(DATASET_FILE_NAME, movesGrids, movesScalars, movesY)
  else:
    appendAllToDataset(DATASET_FILE_NAME, movesGrids, movesScalars, movesY)

  print("FILE: {} {}".format(i, f))
  print(movesGrids.shape, movesScalars.shape, movesY.shape)
  printHDF5File(DATASET_FILE_NAME)
