<a href="https://colab.research.google.com/github/epeay/tetris-ml/blob/main/tetris.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
def auto_pip(libraries):
    """ Invokes pip if needed. Saves time if not. """
    import importlib
    try:
        for library in libraries:
            importlib.import_module(library)
    except ImportError:
        !pip install {" ".join(libraries)}
# avoids invoking pip unless we need it
auto_pip(["gymnasium"])

import gymnasium as gym
from gym import spaces
import numpy as np
import pdb

import tetrislib

import sys

print(sys.path)

sys.exit()

"""
Episode = One tetris game
"""

class ActionFeedback:
    def __init__(self, valid_action=False):
        self.valid_action = valid_action



class TetrominoPiece:

    BLOCK = '▆'

    def __init__(self, shape:int, patterns):
        self.shape:int = shape
        self.pattern_list = patterns
        self.pattern = patterns[0]
        self.rot = 0

    def __str__(self) -> str:
        return f"TetrominoPiece(shape={Tetrominos.shape_name(self.shape)}, rot={self.rot*90}, pattern= {self.printable_pattern(oneline=True)})"

    def printable_pattern(self, oneline=False):
        ret = []
        pattern = self.get_pattern()
        for i, row in enumerate(pattern):
            row_str = " ".join([str(c) for c in row])
            ret.append(row_str)

            if not oneline:
                ret.append("\n")
            else:
                if i < len(pattern)-1:
                    ret.append(" / ",)
        ret = "".join(ret).replace('1', TetrominoPiece.BLOCK).replace('0', '_')
        return "".join(ret)

    def to_dict(self):
        return {
            "shape": self.shape,
            "pattern": self.pattern
        }

    def get_pattern(self):
        return self.pattern

    def rotate(self):
        """Rotates IN PLACE, and returns the new pattern"""
        self.rot = (self.rot + 1) % 4
        self.pattern = self.pattern_list[self.rot]
        return self.pattern

    def get_height(self):
        return len(self.get_pattern())

    def get_width(self):
        return max([len(x) for x in self.get_pattern()])

    def get_bottom_offsets(self):
        """
        For each column in the shape, returns the gap between the bottom of
        the shape (across all columns) and the bottom of the shape in that
        column.

        Returned values in the list would expect to contain at least one 0, and
        no values higher than the height of the shape.

        For example, an S piece:
        _ X X
        X X _

        Would have offsets [0, 0, 1] in this current rotation. This method is
        used in determining if a piece will fit at a certain position
        in the board.
        """
        pattern = self.get_pattern()
        # pdb.set_trace()
        ret = [len(pattern)+1 for x in range(len(pattern[0]))]
        # Iterates rows from top, down
        for ri in range(len(pattern)):
            # Given a T shape:
            # X X X
            # _ X _
            # Start with row [X X X] (ri=0, offset=1)
            row = pattern[ri]
            # print(f"Testing row {row} at index {ri}")
            for ci, col in enumerate(row):
                if col == 1:
                    offset = len(pattern) - ri - 1
                    ret[ci] = offset

            # Will return [1, 0, 1] for a T shape

        if max(ret) >= len(pattern):
          print(f"Pattern:")
          print(pattern)
          print(f"Bottom Offsets: {ret}")
          print(f"Shape: {self.shape}")
          raise ValueError("Tetromino pattern has incomplete bottom offsets")

        return ret

    def get_top_offsets(self):
        """
        Returns the height of the shape at each column.

        For example, an S piece:
        _ X X
        X X _

        Would have offsets [1, 2, 2] in this current rotation. This provides
        guidance on how to update the headroom list.

        Ideally we should cache this.
        """
        pattern = self.get_pattern()
        ret = [0 for x in len(pattern[0])]
        for ri, row in enumerate(range(pattern, )):
            for col in pattern[row]:
                if pattern[row][col] == 1:
                    ret[col] = max(ret[col], row)
        return ret


class Tetrominos:
    O = 1
    I = 2
    S = 3
    Z = 4
    T = 5
    J = 6
    L = 7

    base_patterns = {
        # X X
        # X X
        O: np.array([[1, 1], [1, 1]]),

        # X X X X
        I: np.array([[1, 1, 1, 1]]),

        # _ X X
        # X X _
        S: np.array([[0, 1, 1], [1, 1, 0]]),
        Z: np.array([[1, 1, 0], [0, 1, 1]]),
        T: np.array([[1, 1, 1], [0, 1, 0]]),
        J: np.array([[1, 0, 0], [1, 1, 1]]),
        L: np.array([[0, 0, 1], [1, 1, 1]])
    }

    # Stores patterns for each tetromino, at each rotation
    cache = {}

    def num_tetrominos():
        return len(Tetrominos.base_patterns.keys())

    @staticmethod
    def shape_name(shape):
        if shape == Tetrominos.O:
            return "O"
        elif shape == Tetrominos.I:
            return "I"
        elif shape == Tetrominos.S:
            return "S"
        elif shape == Tetrominos.Z:
            return "Z"
        elif shape == Tetrominos.T:
            return "T"
        elif shape == Tetrominos.J:
            return "J"
        elif shape == Tetrominos.L:
            return "L"
        else:
            raise ValueError("Invalid shape")


    @staticmethod
    def make(shape):
        """
        shape:
        """
        if not Tetrominos.cache:
            for shape, pattern in Tetrominos.base_patterns.items():
                Tetrominos.cache[shape] = [
                    pattern,
                    np.rot90(pattern),
                    np.rot90(pattern, 2),
                    np.rot90(pattern, 3)
                ]


        if shape not in Tetrominos.base_patterns.keys():
            raise ValueError("Invalid shape")

        return TetrominoPiece(shape, Tetrominos.cache[shape])

class TetrisBoard:

    BLOCK = '▆'

    def __init__(self, height, width):
        self.height = height
        self.width = width
        self.reset()

    def reset(self):
        self.board = np.zeros((self.height, self.width), dtype=int)
        self.headroom = [self.height for _ in range(self.width)]
        self.piece = None

    def remove_tetris(self):
        to_delete = []
        for r, row in enumerate(self.board):
            if sum(row) == self.width:
                to_delete.append(r)

        if to_delete:
          self.board = np.delete(self.board, to_delete, axis=0)
          self.board.resize((self.height, self.width))
          # pdb.set_trace()

        return len(to_delete)

    def place_piece(self, piece:TetrominoPiece, logical_coords):
        """
        Places a piece at the specified column. Dynamically calculates correct
        height for the piece.

        piece: a TetrominoPiece object
        logical_coords: The logical row and column for the bottom left
            of the piece's pattern
        """
        pattern = piece.get_pattern()
        bottom_offsets = np.array(piece.get_bottom_offsets())
        # TODO don't calculate all bottoms because we don't need them all

        lrow = logical_coords[0]
        lcol = logical_coords[1]

        p_height = piece.get_height()

        for r in range(p_height):
            pattern_row = pattern[len(pattern)-1-r]
            board_row = self.board[lrow-1+r]

            for i, c in enumerate(pattern_row):
                # Iff c is 1, push it to the board
                board_row[lcol-1+i] |= c


    def find_logical_BL_placement(self, piece:TetrominoPiece, col):
        """
        Returns the logical row and column of the bottom left corner of the
        pattern, such that when placed, the piece will sit flush against existing
        tower parts, and not exceed the max board height.

        Given:
        BOARD       PIECE
        5 _ _ _ _
        4 _ _ _ X
        3 _ _ X X   X X X X
        2 _ X X _
        1 X X X X

        Returns (5, 1)

        Given:
        BOARD       PIECE    COL
        5 _ _ _ _
        4 _ _ _ X
        3 _ _ X X   X X X    1 (lcol 2)
        2 _ X X _     X
        1 X X X X

        Returns (3, 1)

        piece: a TetrominoPiece object
        col: zero-index column to place the 0th column of the piece.
        """
        pattern = piece.get_pattern()
        bottom_offsets = np.array(piece.get_bottom_offsets())
        # TODO don't calculate all bottoms because we don't need them all
        board_heights = np.array(self.get_tops()[col:col+piece.get_width()])

        # Given:
        # BOARD       PIECE
        # 5 _ _ _ _
        # 4 _ _ _ X
        # 3 _ _ X X   X X X X
        # 2 _ X X _
        # 1 X X X X
        # Tops -> [1,2,3,4]
        #
        # The sideways I has bottom offsets [0,0,0,0]
        # Start at min(board_tops)+1 and try to place the piece.
        #
        # If placing on row 2, the piece heights would be [2,2,2,2]
        # Board heights are [1,2,3,4], so this
        # doesn't clear the board for all columns. Try placing on row 3.
        # [3,3,3,3] > [1,2,3,4] ? False
        # Try row 4... False. Try row 5...
        # [5,5,5,5] > [1,2,3,4] ? True
        # So we place the piece on row 5 (index 4)
        #
        # 5 X X X X
        # 4 _ _ _ X
        # 3 _ _ X X
        # 2 _ X X _
        # 1 X X X X
        # (yes, this is a horrible move)

        p_height = piece.get_height()
        p_width = piece.get_width()
        can_place = False

        # TODO Pick better min test height
        # If there's a very narrow, tall tower, and you're placing a flat I
        # just to the left of it, you'll likely test placement for each level of
        # the tower until the piece clears it.
        for place_row in range(min(board_heights)+1, max(board_heights)+2):
            # In the example, place_row would be 2...3...4...5

            # Is [2,2,2,2] > [1,2,3,4] ?
            # Does this placement not interfere with existing board pieces?
            # print(f"Trying placement at row {place_row}")
            # print(f"{(bottom_offsets + place_row)} > {board_heights}")



            bottom_clears_board = all((bottom_offsets + place_row) > board_heights)

            if not bottom_clears_board:
                continue

            # Check the final height
            if place_row-1 + p_height > self.height:
                raise ValueError(f"Requested placement at col {col+1} would require rows {place_row}-{place_row-1 + p_height}. Piece {piece}")

            can_place = True
            break

        if not can_place:
            # pdb.set_trace()
            raise ValueError(f"Piece failed to be placed at lcolumn {col+1}")

        return (place_row, col+1)

    @staticmethod
    def render_state(board, pattern, bl_coords, color=True):
        board = board.copy()

        # Highlight tiles where the last piece was played
        lrow, lcol = bl_coords

        p_height = len(pattern)
        output = False

        for r in range(p_height):
            pattern_row = pattern[len(pattern)-1-r]
            board_row = board[lrow-1+r]

            for i, c in enumerate(pattern_row):
                # Iff c is 1, push it to the board
                if c == 1:
                    board_row[lcol-1+i] = 2


        for i, row in enumerate(reversed(board)):
            if sum(row) == 0 and not output:
                continue
            else:
                output = True

            for cell in row:
                if cell == 2:
                    print(f"\033[36m{TetrisBoard.BLOCK}\033[0m", end=' ')
                elif cell == 1:
                    print(TetrisBoard.BLOCK, end=' ')
                else:
                    print('_', end=' ')
            print()


    def render(self):
        output = False
        for i, row in enumerate(reversed(self.board)):
            if sum(row) == 0 and not output:
                continue
            else:
                output = True

            for cell in row:
                if cell == 1:
                    print(TetrisBoard.BLOCK, end=' ')
                else:
                    print('_', end=' ')
            print()

        if not output:
            print("<<EMPTY BOARD>>")



    def get_tops(self):
        """
        Gets the height of each column on the board.
        This is gonna be inefficient for now.

        A board with only an I at the left side would return [4, 0, 0, ...]
        """
        tops = [0 for _ in range(self.width)]
        for r, row in enumerate(self.board):
            if sum(row) == 0:
                break

            for col, val in enumerate(row):
                if val == 1:
                    tops[col] = r+1

        return tops



class TetrisGameRecord:
    def __init__(self):
        self.moves = 0
        self.lines_cleared = 0
        self.cleared_by_size = {
            1: 0,
            2: 0,
            3: 0,
            4: 0
        }
        self.boards = []
        self.pieces = []
        self.placements = []  # Logical coords of BL corner of piece pattern
        self.rewards = []
        self.outcome = []
        self.success = []
        self.cumulative_reward = 0

class TetrisEnv(gym.Env):
    def __init__(self):
        super(TetrisEnv, self).__init__()
        self.board_height = 20
        self.board_width = 10
        self.board = TetrisBoard(self.board_height, self.board_width)
        self.current_piece = None
        self.pieces = Tetrominos()
        self.reward_history = deque(maxlen=10)
        self.record = TetrisGameRecord()

        # Action space: tuple (column, rotation)
        # TODO Limit action width properly
        self.action_space = spaces.MultiDiscrete([self.board_width, 4])

        # Observation space: the board state
        self.observation_space = spaces.Box(
            low=0,
            high=1,
            shape=(self.board_height * self.board_width + Tetrominos.num_tetrominos(),),
            dtype=int
            )

        self.reset()

    def reset(self):
        self.board.reset()
        self.current_piece = self._get_random_piece()
        self.record = TetrisGameRecord()
        return self._get_board_state()

    def step(self, action):
        col, rotation = action

        # Rotate the piece to the desired rotation
        for _ in range(rotation):
            self.current_piece.rotate() # Rotates IN PLACE

        if not self._is_valid_action(self.current_piece, col+1):
            # We may resolve this with a "redo" instead of stopping the episode
            # in the future.
            # print("Invalid Action")
            # self.board.render()
            # print(f"Action Column: {col+1} (1-{self.board_width})")
            # print(f"Piece: {self.current_piece}")
            done = False
            # print(">>> REDO")
            reward = self._calculate_reward() * 0.5

            self.record.moves += 1
            self.record.boards.append(self.board.board.copy())
            self.record.pieces.append(self.current_piece.to_dict())
            self.record.rewards.append(reward)
            self.record.outcome.append("REDO")
            self.record.placements.append(None)
            self.record.success.append(False)
            self.record.cumulative_reward += reward

            return self._get_board_state(), reward, done, {}


        try:
            # Find where the piece would sit on the board
            lcoords = self.board.find_logical_BL_placement(self.current_piece, col)
        except ValueError as e:
            # print(f"Exception: {e}")
            done = True
            # TODO Account for a fatal placement
            # self.board.render()
            # print(f"Action Column: {col+1} (1-{self.board_width})")
            # print(f"Piece: {self.current_piece}")
            reward = self._calculate_reward() * 0.5

            self.record.moves += 1
            self.record.boards.append(self.board.board.copy())
            self.record.pieces.append(self.current_piece.to_dict())
            self.record.rewards.append(reward)
            self.record.outcome.append("OVERFLOW")
            self.record.placements.append(None)
            self.record.success.append(False)
            self.record.cumulative_reward += reward

            return self._get_board_state(), reward, done, {}


        self.board.place_piece(self.current_piece, lcoords)
        reward = self._calculate_reward()
        self.reward_history.append(reward)
        done = self._is_done()

        self.record.moves += 1
        self.record.boards.append(self.board.board.copy())
        self.record.pieces.append(self.current_piece.to_dict())
        self.record.rewards.append(reward)
        self.record.outcome.append(None)
        self.record.placements.append(lcoords)
        self.record.success.append(True)
        self.record.cumulative_reward += reward


        # Huzzah!
        lines_gone = self.board.remove_tetris()

        if lines_gone > 0:
            self.record.lines_cleared += 1
            self.record.cleared_by_size[lines_gone] += 1


        # Prep for next move
        self.current_piece = self._get_random_piece()
        next_state = self._get_board_state()
        return next_state, reward, done, {}

    def render(self):
        self.board.render()

    def _get_random_piece(self):
        return self.pieces.make(np.random.randint(1, 8))

    def _is_valid_action(self, piece, lcol):
        piece = self.current_piece

        if lcol < 1 or lcol > self.board_width:
            # print("col out of range")
            return False

        # An O on col 1 would take up cols 1-2
        if lcol + piece.get_width() -1 > self.board_width:
            # print("col + width out of range")
            return False
        return True

    def _calculate_reward(self):

        # Evaluate line pack
        # Packed lines produces a higher score
        # Big narrow tower would produce a low score
        active_lines = 0
        board_tiles = 0
        lines_cleared = 0
        for row in self.board.board:
            row_sum = sum(row)
            board_tiles += row_sum
            if row_sum == 0:
                continue

            active_lines += 1
            if row_sum == self.board.width:
                lines_cleared += 1

        if active_lines == 0:
            return 0

        # Simulating an extra 5 packed tiles per line cleared
        line_score = (board_tiles+(5*lines_cleared)) / float(self.board_width * active_lines)
        reward = line_score  # That's all for now
        return reward

    def _is_done(self):
        return False

    def _get_board_state(self):
        # Get the current board state
        board_state = self.board.board.flatten()

        # Create a one-hot encoding for the current piece
        piece_one_hot = np.zeros(Tetrominos.num_tetrominos())
        piece_one_hot[self.current_piece.shape - 1] = 1

        # Concatenate the board state and the one-hot encoding
        return np.concatenate([board_state, piece_one_hot])



def main():

  # Example usage
  env = TetrisEnv()
  state = env.reset()

  done = False
  loop_limit = 10
  loop = 0
  while not done and loop < loop_limit:
      action = env.action_space.sample()  # Random action for demonstration
      next_state, reward, done, _ = env.step(action)
      env.board.render()
      print(f"Reward: {reward}, Done: {done}")
      loop += 1




['/content', '/env/python', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/usr/local/lib/python3.10/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.10/dist-packages/IPython/extensions', '/root/.ipython']


SystemExit: 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, action_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

import random
from collections import deque

class DQNAgent:
    def __init__(self, state_dim, action_dim, learning_rate=0.001, discount_factor=0.99, exploration_rate=1.0, exploration_decay=0.995, min_exploration_rate=0.01, replay_buffer_size=10000, batch_size=64):
        self.state_dim = state_dim
        self.action_dim = action_dim[0] * action_dim[1]  # Total number of actions
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay
        self.min_exploration_rate = min_exploration_rate
        self.replay_buffer = deque(maxlen=replay_buffer_size)
        self.batch_size = batch_size

        self.model = DQN(state_dim, self.action_dim)
        self.target_model = DQN(state_dim, self.action_dim)
        self.update_target_model()

        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.loss_fn = nn.MSELoss()
        self.game_records = []

    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def guess(self, state):
        return (random.randint(0, self.action_dim // 4 - 1), random.randint(0, 4 - 1))

    def predict(self, state):
        state = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.model(state)
        action_index = torch.argmax(q_values).item()
        return (action_index // 4, action_index % 4)

    def choose_action(self, state):
        if random.uniform(0, 1) < self.exploration_rate:
            return self.guess(state)
        else:
            return self.predict(state)

    def evaluate(self, env, num_episodes=10):
        total_rewards = []
        for _ in range(num_episodes):

            self.game_records.append(env.record)

            state = env.reset()
            total_reward = 0
            done = False
            while not done:
                action = self.predict(state)  # Always use the learned policy
                next_state, reward, done, _ = env.step(action)
                state = next_state
                total_reward += reward
            total_rewards.append(total_reward)
        return total_rewards


    def train(self, env, num_episodes=10):
        total_rewards = []
        target_update_interval = 10

        for episode in range(num_episodes):
            if env.record.moves > 0:
                self.game_records.append(env.record)
            state = env.reset().flatten()  # Flatten the state to fit the input of the network
            step_count = 0
            total_reward = 0
            done = False

            while not done:
                action = agent.choose_action(state)
                next_state, reward, done, _ = env.step(action)
                step_count += 1
                next_state = next_state.flatten()

                self.remember(state, action, reward, next_state, done)
                self.replay()
                state = next_state
                total_reward += reward

            self.decay_exploration_rate()
            total_rewards.append(total_reward)

            if episode % target_update_interval == 0:
                agent.update_target_model()

        return total_rewards


    def replay(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        # print(">>>> REPLAY")
        batch = random.sample(self.replay_buffer, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.FloatTensor(states)
        actions = torch.LongTensor([a[0] * 4 + a[1] for a in actions])
        rewards = torch.FloatTensor(rewards)
        next_states = torch.FloatTensor(next_states)
        dones = torch.FloatTensor(dones)

        # Calculate current Q values
        q_values = self.model(states).gather(1, actions.unsqueeze(1)).squeeze(1)

        # Calculate next Q values using the target model
        next_q_values = self.target_model(next_states).max(1)[0]
        target_q_values = rewards + self.discount_factor * next_q_values * (1 - dones)

        # Compute the loss
        loss = self.loss_fn(q_values, target_q_values)

        # Perform the optimization step
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def decay_exploration_rate(self):
        self.exploration_rate = max(self.min_exploration_rate, self.exploration_rate * self.exploration_decay)


In [None]:
# Initialize Tetris environment
env = TetrisEnv()
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.nvec

agent = DQNAgent(state_dim, action_dim)

num_episodes = 10
target_update_interval = 10

training_tracker = []



[4.738097784568373, 8.831328671328672, 9.555963664818155, 10.728552830038899, 5.8938041112654105, 9.188277944106117, 8.33229436713957, 5.171808018263126, 6.651104901290662, 4.682355977355977, 4.31929292929293, 11.149641947319967, 8.267410951621478, 8.228891346012091, 6.6988095238095235, 11.132971472971475, 10.719028560019273, 6.155971593765714, 10.388941497099395, 6.3597186147186155, 3.5374560080442436, 7.147582757799477, 15.643380055616898, 7.411849816849819, 4.4664127702363, 2.397044329939067, 8.156083524318822, 10.245952380952383, 10.789433811802233, 6.909817324058809, 8.556541996015682, 7.633655367680137, 7.739112554112555, 6.357315462315462, 15.589264069264066, 8.30405760905761, 2.914761904761905, 9.814126984126986, 8.467515262515263, 9.82294144108386, 4.999796380090497, 6.8310901645653965, 5.256492210919456, 12.230522875816996, 6.908549314462627, 14.784156431803488, 8.315559922533609, 8.301485152274628, 6.601884226884226, 6.837871017871018, 8.111375944317121, 8.862429967693128, 9

In [None]:
# Do/Continue training without resetting the env

for i in range(5):
    rewards = agent.train(env, 100)
    training_tracker.append(("TRAIN", rewards))
    print(rewards)
    print(f"Rewards avg/min/max: {np.average(rewards)}/{np.min(rewards)}/{np.max(rewards)}")

for i in range(0):
    rewards = agent.evaluate(env, 100)
    print(rewards)
    training_tracker.append(("EVALUATE", rewards))
    print(f"Rewards avg/min/max: {np.average(rewards)}/{np.min(rewards)}/{np.max(rewards)}")
    rewards = agent.train(env, 100)
    print(rewards)
    training_tracker.append(("TRAIN", rewards))
    print(f"Rewards avg/min/max: {np.average(rewards)}/{np.min(rewards)}/{np.max(rewards)}")


print("Training completed.")

[16.72458995081287, 8.751499582289055, 15.712357212790643, 8.953342670401492, 12.853992673992671, 22.26077036396699, 19.183218623481785, 10.555093606084318, 10.365026291218246, 6.326855301561184, 17.369021150847768, 14.290117271789084, 13.103591189512242, 10.994536340852134, 29.491935520619688, 7.561241830065358, 25.586553773024374, 14.159929719403392, 30.417298326028995, 27.53943381180223, 12.754666076957117, 6.8909690893901425, 15.404833003254076, 28.174920634920618, 8.336217834453128, 20.098377561550926, 18.305422551304886, 10.617656887316329, 20.34892572339941, 8.063832375055286, 13.651904761904758, 34.467282658002546, 11.841904761904766, 10.308587645584549, 11.024632414369247, 16.475946275946296, 11.783684298157972, 12.469765590446698, 43.88453449800961, 28.479028944911267, 32.85742815148385, 9.101430849851907, 11.31189865689865, 8.352054601744996, 13.718137254901952, 21.130476370035204, 39.12432900432908, 25.488947368420984, 9.622907647907644, 12.084926184926175, 25.0466904200030

In [None]:

import sys

records = agent.game_records

for i, g in enumerate(records):
    g.id = i

    real_outcomes = len(list(filter(lambda x: (x is not None), g.placements)))
    # print(f"Game {i} placements {len(g.placements)}, REAL placements {real_outcomes} diff {real_outcomes-len(g.placements)}")
    g.real_moves = real_outcomes



# for i in range(10):
#     print(f"Moves: {records[i].moves}")
#     print(f"Lines cleared: {records[i].lines_cleared}")
#     print(f"Cumulative reward: {records[i].cumulative_reward}")
#     print("----------------------")


s_games = sorted(records, key=lambda x: x.real_moves, reverse=True)

show_me = s_games[0]

print(f"Moves: {show_me.moves}")
print(f"Lines cleared: {show_me.lines_cleared}")
print(f"Boards Length: {len(show_me.boards)}")
print(f"Rewards Length: {len(show_me.rewards)}")

print(show_me.placements)

print(show_me.lines_cleared)
print(show_me.cleared_by_size)
for i in range(show_me.moves):
    board = show_me.boards[i]
    piece = show_me.pieces[i]
    placement = show_me.placements[i]

    if piece and placement:
        TetrisBoard.render_state(show_me.boards[i], show_me.pieces[i]['pattern'], show_me.placements[i])
        print(f"Reward: {show_me.rewards[i]}")
        print("----------------------")




Moves: 193
Lines cleared: 2
Boards Length: 193
Rewards Length: 193
[(1, 1), (3, 1), (1, 9), (1, 6), (5, 1), (3, 7), (5, 7), (1, 4), (3, 4), (5, 4), (7, 1), (8, 4), (6, 7), (9, 8), (9, 9), (7, 2), (11, 1), (12, 9), (15, 9), (15, 1), (9, 3), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, (12, 2), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, (19, 1), (8, 6), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, (9, 7), (12, 6), (15, 7), (17, 7), (9, 4), (11, 4), (14, 4), (17, 4), None, None, None, (19, 5), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, N

In [None]:
def render_alt(board, pattern, bl_coords, color=True):
    board = board.copy()

    # Highlight tiles where the last piece was played
    lrow, lcol = bl_coords

    print(bl_coords)

    p_height = len(pattern)
    output = False

    for r in range(p_height):
        pattern_row = pattern[len(pattern)-1-r]
        board_row = board[lrow-1+r]

        print(pattern_row)
        print(f"Row: {lrow-1+r}, {board_row}")

        for i, c in enumerate(pattern_row):
            # Iff c is 1, push it to the board
            if c == 1:
                board_row[lcol-1+i] = 2


    for i, row in enumerate(reversed(board)):
        if sum(row) == 0 and not output:
            continue
        else:
            output = True

        for cell in row:
            if cell == 2:
                print(f"\033[36m{TetrisBoard.BLOCK}\033[0m", end=' ')
            elif cell == 1:
                print(TetrisBoard.BLOCK, end=' ')
            else:
                print('_', end=' ')
        print()




for i in range(game0.moves):
    board = game0.boards[i]
    piece = game0.pieces[i]
    placement = game0.placements[i]

    if piece and placement:
        render_alt(game0.boards[i], game0.pieces[i]['pattern'], game0.placements[i])
        print("----------------------")