In [1]:
#Necessary Dependencies:
#!pip install gym_tetris
#!pip install nes-py
#!pip install import-ipynb

In [2]:
#Necessary imports from external libraries:
from nes_py.wrappers import JoypadSpace
import gym_tetris
from gym_tetris.actions import MOVEMENT
import numpy as np
import matplotlib.pyplot as plt
import time
import copy

#Necessary imports from other files:
#from A_star import TetrisGameState
#from A_star import TetrisNode
from A_star import AStarTetrisSolver
from UCT import UCTTetrisSolver
from UCT import CollisionDetection
import tetronimoes

In [3]:
#AStar Check
#astartetris = TetrisGameState("insert board here")
#node = TetrisNode("insert state here", "insert position here", "insert rotation here")
#solver = AStarTetrisSolver("insert start game state here")

In [4]:
#UCT Check
#uct = UCTTetrisSolver("insert board here")

In [5]:
#From here on out, this is the actual code of the Tetris program.
#It has been split into different blocks, as putting it all in a single block would cause severe lag.

In [6]:
#Necessary constant
#This implementation uses the NTSC version of tetris, which has slightly different frames for the falling pieces than the PAL version.
#Source for fall frames: https://listfist.com/list-of-tetris-levels-by-speed-nes-ntsc-vs-pal

#This is what corresponds to the piece location/rotation, which is found via self.env.ram[0x0042].
#It's not particularly useful for our purposes because info["current_piece"] is just outright better, but it is still noteworthy.
PieceOrientation = {
    "T" : [0, 1, 2, 3],
    "J" : [4, 5, 6, 7],
    "Z" : [8, 9],
    "O" : [10],
    "S" : [11, 12],
    "L" : [13, 14, 15, 16],
    "I" : [17, 18]
    
}


In [7]:
print(MOVEMENT)

[['NOOP'], ['A'], ['B'], ['right'], ['right', 'A'], ['right', 'B'], ['left'], ['left', 'A'], ['left', 'B'], ['down'], ['down', 'A'], ['down', 'B']]


In [8]:
AStarTestBoard = ((0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
#Also starting point of all pieces
AStarStartLocation = (5,0)
AStarStartPiece = "Ld"
AStarGoalLocation = (3,17)
AStarGoalPiece = "Ld"


In [9]:
UCTTestBoard =   ((0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
                  (0, 1, 0, 1, 0, 0, 1, 0, 1, 0), 
                  (0, 1, 0, 1, 0, 0, 1, 0, 1, 0), 
                  (0, 1, 0, 1, 0, 0, 1, 0, 1, 0), 
                  (0, 1, 0, 1, 0, 0, 1, 0, 1, 0), 
                  (0, 1, 0, 1, 0, 0, 1, 0, 1, 0), 
                  (1, 1, 1, 1, 0, 0, 1, 1, 1, 1), 
                  (1, 1, 1, 1, 0, 1, 1, 1, 1, 1), 
                  (1, 1, 1, 1, 0, 1, 1, 1, 1, 1))
UCTPiece = "Jl"
UCTNextPiece = "Iv"
UCTGoalLocation = (4,18) #Jr

In [10]:
#Takes the board ripped out from the NES RAM and converts it to a simpler board to use
#Is mostly necessary because the board in the NES RAM also takes into account the colors of the pieces, which is not necessary for the algorithm to know.
def ColorBoardtoSimpleBoard(board):
    simpleboard = []
    for row in board:
        simplerow = []
        for cell in row:
            if cell == 239:
                simplerow.append(0)
            else:
                simplerow.append(1)
        simpleboard.append(tuple(simplerow))
    return tuple(simpleboard)


In [11]:
#To use this class, simply run the following line in Agent's action function:
#MicroState(ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), self.info, tuple(self.env.ram[0x0040:0x0042]), self.env.ram[0x0044], "NOOP")
#A* Algorithm can then be run via running takeAction function 
class MicroState:
    movement = [['NOOP'], ['A'], ['B'], ['right'], ['right', 'A'], ['right', 'B'], ['left'], ['left', 'A'], ['left', 'B'], ['down'], ['down', 'A'], ['down', 'B']]
    SpeedtoFallFrames = [48, 43, 38, 28, 23, 18, 13, 8, 6, 5, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1]
    rot2num = {"v": 0, "h": 1, "u": 0, "r": 1, "d": 2, "l": 3}
    
    def __init__(self, boardstate, currentpiece, piecelocation, goalstate=None, speed=0, previousaction="NOOP", parent=None, g=0):
        self.boardstate = boardstate
        self.currentpiece = currentpiece
        self.piecelocation = list(piecelocation)
        self.goalstate = goalstate
        self.speed = speed
        self.fallframes = self.SpeedtoFallFrames[min(speed, 29)] - 1
        self.softdrop = 0
        self.previousaction = previousaction
        self.parent = parent
        self.g_score = g
        self.h_score = self.heuristic(goalstate)
        self.f_score = self.g_score + self.h_score
    
    def takeAction(self, action):
        bs = copy.deepcopy(self.boardstate)
        cp = copy.deepcopy(self.currentpiece)
        #pl = copy.deepcopy(self.piecelocation)
        
        ff = copy.deepcopy(self.fallframes)
        sd = copy.deepcopy(self.softdrop)
        
        
        
        if "A" in action and "A" not in self.previousaction:
            if cp[0] != "O":
                if cp[1] == "h":
                    cp = cp[0] + "v"
                elif cp[1] == "v":
                    cp = cp[0] + "h"
                elif cp[1] == "u":
                    cp = cp[0] + "r"
                elif cp[1] == "r":
                    cp = cp[0] + "d"
                elif cp[1] == "d":
                    cp = cp[0] + "l"
                elif cp[1] == "l":
                    cp = cp[0] + "u"
        
        if "B" in action and "B" not in self.previousaction:
            if cp[0] != "O":
                if cp[1] == "h":
                    cp = cp[0] + "v"
                elif cp[1] == "v":
                    cp = cp[0] + "h"
                elif cp[1] == "u":
                    cp = cp[0] + "l"
                elif cp[1] == "l":
                    cp = cp[0] + "d"
                elif cp[1] == "d":
                    cp = cp[0] + "r"
                elif cp[1] == "r":
                    cp = cp[0] + "u"
        
        
        
        pl = [tetronimoes.TETRO_TRANS[cp][0] + self.piecelocation[0], tetronimoes.TETRO_TRANS[cp][1] + self.piecelocation[1]]
        
        if "down" in action:
            if sd == 2 and ff > 1:
                pl[1] += 1
                sd = 1
            elif sd == 0 or sd == 1:
                sd += 1
        else:
            sd = 0
            
        
        if "left" in action and "left" not in self.previousaction:
            pl[0] -= 1
            if not CollisionDetection(bs, self.currentpiece, pl):
                pl[0] += 1
        if "right" in action and "right" not in self.previousaction:
            pl[0] += 1
            if not CollisionDetection(bs, self.currentpiece, pl):
                pl[0] -= 1
        
        pl = (pl[0] - tetronimoes.TETRO_TRANS[cp][0], pl[1] - tetronimoes.TETRO_TRANS[cp][1])
        
        
        
        ff -= 1
        if ff <= 0:
            pl[1] += 1
        #print(bs, cp, pl, self.goalstate, self.speed, action, copy.deepcopy(self), self.g_score + 1)
        
        m = MicroState(bs, cp, pl, copy.deepcopy(self.goalstate), copy.deepcopy(self.speed), action, copy.deepcopy(self), copy.deepcopy(self.g_score) + 1)
        #print(m)
        #print(self)
        m.fallframes = ff
        m.softdrop = sd
        return m
    
    def heuristic(self, goal):
        if goal is None:
            return 0
        if len(self.currentpiece) == 1:
            currot = 0
        else:
            currot = self.rot2num[self.currentpiece[1]]
        
        if len(goal.currentpiece) == 1:
            goalrot = 0
        else:
            goalrot = self.rot2num[goal.currentpiece[1]]
        rotcost = min(abs(currot - goalrot), abs(((currot + 1) % 4) - ((goalrot + 1) % 4)))
        movcostx = 2 * abs(self.piecelocation[0] - goal.piecelocation[0])
        
        #Since you can't move left/right twice consecutively, this rule has to be added to properly convey the rules
        if not ((self.piecelocation[0] - goal.piecelocation[0] < 0 and "left" in self.previousaction) or (self.piecelocation[0] - goal.piecelocation[0] > 0 and "right" in self.previousaction)):
            movcostx = max(0, movcostx - 1)
            
        movcosty = (2 * abs(self.piecelocation[1] - goal.piecelocation[1])) 
        if movcosty > 0:
            movcosty -= (self.softdrop - 1)
        movcost = movcostx + movcosty
        if goal.piecelocation[1] < self.piecelocation[1]:
            #If goal is above the current piece, it's impossible to reach and should therefore not be added
            movcost = 9999
        return rotcost + movcost
    
    def is_goal(self, goal):
        return (self.currentpiece == goal.currentpiece and self.piecelocation == goal.piecelocation)
    
    def generate_neighbors(self):
        return tuple([self.takeAction(action) for action in self.movement])
    
    def __eq__(self,other):
        #print(other)
        try:
            if self.currentpiece == other.currentpiece and tuple(self.piecelocation) == tuple(other.piecelocation) and tuple(self.previousaction) == tuple(other.previousaction) and self.softdrop == other.softdrop:
                return True
            return False
        except:
            if self.currentpiece == other[0] and tuple(self.piecelocation) == tuple(other[1]) and tuple(self.previousaction) == tuple(other[2]) and self.softdrop == other[3]:
                return True
            return False
        
    
    def __lt__(self,other):
        if self.f_score < other.f_score:
            return True
        if self.f_score == other.f_score:
            if self.h_score < other.h_score:
                return True
        return False
    
    def __gt__(self,other):
        if self.f_score > other.f_score:
            return True
        if self.f_score == other.f_score:
            if self.h_score > other.h_score:
                return True
        return False
    
    def __str__(self):
        return str([self.currentpiece, self.piecelocation, self.previousaction, self.f_score, self.h_score])
    
    def __hash__(self):
        #print(self.currentpiece)
        #print(self.piecelocation)
        #print(self.previousaction)
        return hash((self.currentpiece, tuple(self.piecelocation), tuple(self.previousaction), self.softdrop))
        

In [12]:
#STOP

In [13]:
AGoalState = MicroState(AStarTestBoard, AStarGoalPiece, AStarGoalLocation)
AStartState = MicroState(AStarTestBoard, AStarStartPiece, AStarStartLocation, AGoalState)
neighbors = AStartState.generate_neighbors()
for state in neighbors:
    print(state)
#print(solved)

['Ld', [5, 0], ['NOOP'], 39, 38]
['Ll', [5, 0], ['A'], 40, 39]
['Lr', [5, 0], ['B'], 40, 39]
['Ld', [6, 0], ['right'], 42, 41]
['Ll', [5, 0], ['right', 'A'], 41, 40]
['Lr', [5, 0], ['right', 'B'], 41, 40]
['Ld', [4, 0], ['left'], 37, 36]
['Ll', [5, 0], ['left', 'A'], 40, 39]
['Lr', [5, 0], ['left', 'B'], 40, 39]
['Ld', [5, 0], ['down'], 39, 38]
['Ll', [5, 0], ['down', 'A'], 40, 39]
['Lr', [5, 0], ['down', 'B'], 40, 39]


In [14]:
TestJd = MicroState(AStarTestBoard, "Jd", (5,0), AGoalState)
neighbors = AStartState.generate_neighbors()
for state in neighbors:
    print(state)

['Ld', [5, 0], ['NOOP'], 39, 38]
['Ll', [5, 0], ['A'], 40, 39]
['Lr', [5, 0], ['B'], 40, 39]
['Ld', [6, 0], ['right'], 42, 41]
['Ll', [5, 0], ['right', 'A'], 41, 40]
['Lr', [5, 0], ['right', 'B'], 41, 40]
['Ld', [4, 0], ['left'], 37, 36]
['Ll', [5, 0], ['left', 'A'], 40, 39]
['Lr', [5, 0], ['left', 'B'], 40, 39]
['Ld', [5, 0], ['down'], 39, 38]
['Ll', [5, 0], ['down', 'A'], 40, 39]
['Lr', [5, 0], ['down', 'B'], 40, 39]


In [15]:
AStar = AStarTetrisSolver(AStartState, AGoalState)
solved = AStar.solve()

In [16]:
for i in range(0,len(solved[0])):
    print((solved[0][i], solved[1][i]))

(['left'], [5, 0])
(['down'], [4, 0])
(['down'], [4, 0])
(['down'], [4, 0])
(['down'], [4, 1])
(['down'], [4, 1])
(['down'], [4, 2])
(['down'], [4, 2])
(['down'], [4, 3])
(['down'], [4, 3])
(['down'], [4, 4])
(['down'], [4, 4])
(['down'], [4, 5])
(['down'], [4, 5])
(['down'], [4, 6])
(['down'], [4, 6])
(['down'], [4, 7])
(['down'], [4, 7])
(['down'], [4, 8])
(['down'], [4, 8])
(['down'], [4, 9])
(['down'], [4, 9])
(['down'], [4, 10])
(['down'], [4, 10])
(['down'], [4, 11])
(['down'], [4, 11])
(['down'], [4, 12])
(['down'], [4, 12])
(['down'], [4, 13])
(['down'], [4, 13])
(['down'], [4, 14])
(['down'], [4, 14])
(['down'], [4, 15])
(['down'], [4, 15])
(['down'], [4, 16])
(['down'], [4, 16])
(['left'], [4, 17])
(['down'], [3, 17])
(['down'], [3, 17])
(['down'], [3, 17])
(['NOOP'], [3, 17])


In [17]:
print(len(solved[0]))
print(len(solved[1]))

41
41


In [18]:
#STOP

In [19]:
#NewNode = AStartState.takeAction("left")
#print(AStartState.h_score)
#print(AStartState)
#print(NewNode.parent)
#print(NewNode)

In [20]:
#solved.append(['down'])
#solved.append(['NOOP'])
#solved.append(['NOOP'])

In [21]:
#print(solved)

In [22]:
#Normal results:
#[(['left', 'A'], [5, 0]), 
#(['down'], [4, 0]), 
#(['left'], [4, 0]), 
#(['down'], [3, 0]), 
#(['left'], [3, 0]), 
#(['down'], [2, 0]), 
#(['down'], [2, 0]), 
#(['down'], [2, 0]), 
#(['down'], [2, 1]), 
#(['down'], [2, 1]), 
#(['down'], [2, 2]), 
#(['down'], [2, 2]), 
#(['down'], [2, 3]), 
#(['down'], [2, 3]), 
#(['down'], [2, 4]), 
#(['down'], [2, 4]), 
#(['down'], [2, 5]), 
#(['down'], [2, 5]), 
#(['down'], [2, 6]), 
#(['down'], [2, 6]), 
#(['down'], [2, 7]), 
#(['down'], [2, 7]), 
#(['down'], [2, 8]), 
#(['down'], [2, 8]), 
#(['down'], [2, 9]), 
#(['down'], [2, 9]), 
#(['down'], [2, 10]), 
#(['down'], [2, 10]), 
#(['down'], [2, 11]), 
#(['down'], [2, 11]), 
#(['down'], [2, 12]), 
#(['down'], [2, 12]), 
#(['down'], [2, 13]), 
#(['down'], [2, 13]), 
#(['down'], [2, 14]), 
#(['down'], [2, 14]), 
#(['down'], [2, 15]), 
#(['down'], [2, 15]), 
#(['down'], [2, 16]), 
#(['down'], [2, 16]), 
#(['down'], [2, 17]), 
#(['down'], [2, 17]), 
#(['left'], [2, 18])]

#Actual results:
#(['left', 'A'], [5, 0])
#(['down'], [5, 0])
#(['left'], [4, 0])
#(['down'], [4, 0])
#(['left'], [3, 0])
#(['down'], [3, 0])
#(['down'], [2, 0])
#(['down'], [2, 0])
#(['down'], [2, 0])
#(['down'], [2, 1])
#(['down'], [2, 1])
#(['down'], [2, 2])
#(['down'], [2, 2])
#(['down'], [2, 3])
#(['down'], [2, 3])
#(['down'], [2, 4])
#(['down'], [2, 4])
#(['down'], [2, 5])
#(['down'], [2, 5])
#(['down'], [2, 6])
#(['down'], [2, 6])
#(['down'], [2, 7])
#(['down'], [2, 7])
#(['down'], [2, 8])
#(['down'], [2, 8])
#(['down'], [2, 9])
#(['down'], [2, 9])
#(['down'], [2, 10])
#(['down'], [2, 10])
#(['down'], [2, 11])
#(['down'], [2, 11])
#(['down'], [2, 12])
#(['down'], [2, 12])
#(['down'], [2, 13])
#(['down'], [2, 13])
#(['down'], [2, 14])
#(['down'], [2, 14])
#(['down'], [2, 15])
#(['down'], [2, 15])
#(['down'], [2, 16])
#(['down'], [2, 16])
#(['down'], [2, 17])
#(['left'], [2, 17])

In [23]:
#STOP

In [24]:
#To use this class, simply run the following line in Agent's action function:
##MacroState(ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), self.info, self.nextinfo)
class MacroState:
    
    def __init__(self, boardstate, currentpiece, nextpiece):
        self.boardstate = boardstate
        self.currentpiece = currentpiece
        self.next_piece = nextpiece


In [25]:
#UCTTest = MacroState(UCTTestBoard, UCTPiece, UCTNextPiece)

In [26]:
#uct = UCTTetrisSolver(UCTTestBoard, UCTTest)

In [27]:
#print(UCTPiece)

In [28]:
#print(uct.run(UCTPiece, UCTTestBoard))

In [29]:
#STOP

In [30]:
#Agent class itself
class Agent:
    
    def __init__(self, episodes=1, num_sims=10, num_pieces=2):
        self.env = gym_tetris.make('TetrisA-v2',deterministic = True)
        self.env = JoypadSpace(self.env, MOVEMENT)
        self.env.deterministic = True
        #Testing to see whether using the pixels for the state works better than just the board.
        #self.state = self.env.reset()
        self.env.reset()
        self.env.deterministic = True
        self.env.render()
        self.state = tuple([ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), "", "", tuple(self.env.ram[0x0040:0x0042])])
        print(ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()))
        print(tuple(self.env.ram[0x0040:0x0042]))
        #print(self.env.ram[0x0044])
        self.info = "NONE"
        self.nextinfo = "NONE"
        self.prevaction = "NOOP"
        self.highscore = 0
        self.time = 0
        self.linestates = []
        self.listofhighscores = []
        self.listofhighscorerates = []
        self.listofsafetyscores = []
        
        
        self.actions = MOVEMENT
        self.state_actions = []  # state & action track

        self.episodes = episodes  # number of episodes going to play
        self.steps_per_episode = []
        
        self.num_sims = num_sims
        self.num_pieces = num_pieces
        
        self.actiontape = []
        self.loctape = []
        self.cptape = []
        
        
    def chooseAction(self):
        #action = 0
        
        #print(self.actions)
        if self.info == "NONE":
            action = np.random.choice(len(self.actions))
        else:
            #Algorithms go here
            Board = ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy())
            #for row in Board:
            #    print(row)
            
            UCTTest = MacroState(Board, self.info, self.nextinfo)
            UCT = UCTTetrisSolver(Board, UCTTest)
            UCTResult = UCT.run(self.info, Board, self.num_sims, self.num_pieces)
            
            AStarGoalPiece = UCTResult[0]['piece']
            self.goallocation = UCTResult[0]['location']
            AStarGoalLocation = UCTResult[0]['location']
            AGoalState = MicroState(Board, AStarGoalPiece, AStarGoalLocation)
            AStartState = MicroState(Board, self.info, tuple(self.env.ram[0x0040:0x0042]), AGoalState, self.env.ram[0x0044], self.prevaction)
            #print(self.env.ram[0x0044])
            
            
            AStar = AStarTetrisSolver(AStartState, AGoalState)
            AStarResult = AStar.solve()
            print(UCTResult)
            print(self.info, tuple(self.env.ram[0x0040:0x0042]))
            print(AStarGoalPiece, AStarGoalLocation)
            
            #print(AStarResult)
            if AStarResult is None:
                #self.env.close()
                #print(AStarGoalPiece, AStarGoalLocation)
                raise IndexError
            
            self.actiontape = AStarResult[0]
            self.loctape = AStarResult[1]
            self.cptape = AStarResult[2]
            action = MOVEMENT.index(self.actiontape[0])
            print(self.actiontape)
            print(self.cptape)
            
            
            
        self.prevaction = MOVEMENT[action]

        return action
    
    def reset(self):
        self.env.close()
        self.env = gym_tetris.make('TetrisA-v2',deterministic = True)
        self.env = JoypadSpace(self.env, MOVEMENT)
        self.env.deterministic = True
        self.env.reset()
        self.env.deterministic = True
        self.env.render()
        self.state = tuple([ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), "", "", tuple(self.env.ram[0x0040:0x0042])])
        self.info = "NONE"
        self.nextinfo = "NONE"
        self.prevaction = "NOOP"
        #Conversion of self.state to tuple for hashing purposes
        #self.state = tuple([tuple(x) for x in self.state])
        self.state_actions = []
        self.highscore = 0
        self.time = 0
        self.linestates = []
        
        self.actiontape = []


    def play(self):
        self.steps_per_episode = []  
        
        for ep in range(self.episodes):
            done = False
            once = True
            iterator = 1
            while not done:
                
                
                if self.env.ram[0x0048] != 1: #Player is not in control
                    action = 0 #NOOP
                    once = True
                    if self.goallocation != tuple(self.env.ram[0x0040:0x0042]) and errorthrow:
                        #print("Error: AStar search does not match actual results")
                        print(self.goallocation)
                        print(tuple(self.env.ram[0x0040:0x0042]))
                        errorthrow = False
                    
                    
                    if len(self.actiontape) > 0: #Contingency so that actiontape from one piece does not carry over to the next
                        print(self.actiontape)
                        self.actiontape = []
                elif once: #Necessary to NOOP at first frame of piece because otherwise desync can occur
                    once = False
                    action = 0 #NOOP
                
                elif iterator >= len(self.actiontape):
                    action = self.chooseAction()
                    iterator = 1
                    errorthrow = True
                else:
                    
                    
                    #print(self.loctape[0])
                    #print(tuple(self.env.ram[0x0040:0x0042]))
                    if tuple(self.loctape[iterator-1]) != tuple(self.env.ram[0x0040:0x0042]):
                        print("Error: AStar search does not match actual location")
                        print(self.loctape[iterator-1])
                        print(tuple(self.env.ram[0x0040:0x0042]))
                        print(iterator-1)
                        print(self.actiontape)
                        print(self.loctape)
                        #raise ValueError
                    
                    if self.cptape[iterator-1] != self.info:
                        print("Error: AStar search does not match actual rotation")
                        print(self.cptape[iterator-1])
                        print(self.info)
                        print(iterator-1)
                        print(self.actiontape)
                        print(self.cptape)
                        raise RuntimeError
                    
                    #if tuple(self.loctape[iterator]) != tuple(self.env.ram[0x0040:0x0042]):
                    #    raise ValueError
                    
                    action = MOVEMENT.index(self.actiontape[iterator])
                    iterator += 1
                    #del self.actiontape[0]
                    #del self.loctape[0]
                    #del self.cptape[0]
                    errorthrow = True
                #self.oops = False
                self.state_actions.append((self.state, action))
                try:
                    previnfo = info["current_piece"]
                except:
                    j = "j"

                #unusedstate, reward, done, info = self.env.step(self.env.action_space.sample())
                
                #print((MOVEMENT[action], list(self.env.ram[0x0040:0x0042])))
                #time.sleep(1)
                
                unusedstate, reward, done, info = self.env.step(action)
                
                #print(tuple(self.env.ram[0x0040:0x0042]))
                #time.sleep(1)
                
                #print(self.env.ram[0x0045])
                #try:
                #    if previnfo[0] != info["current_piece"][0]:
                        #print(previnfo)
                        #print(prevpos)
                        #Time assuming no action: 960
                        #Time assuming down: 49
                        #print(self.time)
                #        self.oops = True
                #except:
                #    j = "j"
                #print(info["current_piece"])
                #print(info["next_piece"])
                #print(self.env.ram[0x0042])
                #if self.env.ram[0x0048] != 1:
                #    print(self.time)
                
                self.env.render()
                self.state = tuple([ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), info["current_piece"], info["next_piece"], tuple(self.env.ram[0x0040:0x0042])])
                self.info = info["current_piece"]
                self.nextinfo = info["next_piece"]
                
                #if info["current_piece"] == "Tu":
                    #print(info["current_piece"])
                    #time.sleep(10)
                
                #if CollisionDetection(self.state[0], self.state[1], self.state[3]) == False:
                    #print(self.state[1])
                    #print(self.state[3])
                    #time.sleep(10)
                
                self.highscore = info["score"]
                self.time += 1
                self.linestates.append(info["board_height"])

            # end of game
            #if ep % 10 == 0:
            self.listofhighscores.append(self.highscore)
            self.listofhighscorerates.append(self.highscore / self.time)
            self.listofsafetyscores.append(sum(self.linestates) / self.time)
            print("episode", ep)
            print("Highscore: " + str(self.highscore))
            print("Score rate: " + str(self.highscore / self.time))
            print("Safety score: " + str(sum(self.linestates) / self.time))
            self.steps_per_episode.append(len(self.state_actions))
            #print(ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()))
            self.reset()
        self.env.close()
if __name__ == "__main__":
    N_EPISODES = 1
    # comparison
    agent = Agent(episodes=N_EPISODES, num_sims=10, num_pieces = 2)
    agent.play()

    highscores = agent.listofhighscores
    highscorerates = agent.listofhighscorerates
    safetyscores = agent.listofsafetyscores

    plt.figure(figsize=[10, 6])
    plt.ylim(0, 50)
    plt.plot(range(N_EPISODES), highscores, label="high score")
    plt.legend()
        
    plt.figure(figsize=[10, 6])
    plt.ylim(0, 0.1)
    plt.plot(range(N_EPISODES), highscorerates, label="score rate")
    plt.legend()
    
    plt.figure(figsize=[10, 6])
    plt.ylim(4, 12)
    plt.plot(range(N_EPISODES), safetyscores, label="safety score")
    plt.legend()



  logger.warn(
  logger.warn(
  logger.warn(


((0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
(5, 0)


  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):


[{'piece': 'Jr', 'location': (7, 18), 'score': -0.8}, {'piece': 'Jd', 'location': (2, 18), 'score': -4.9}, {'piece': 'Ju', 'location': (2, 19), 'score': -6.0}, {'piece': 'Ju', 'location': (1, 19), 'score': -7.6}, {'piece': 'Jd', 'location': (-1, -1), 'score': -1000}]
Jd (5, 0)
Jr (7, 18)
[['right'], ['down', 'B'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['down'], ['right'], ['down'], ['down'], ['down'], ['NOOP']]
['Jd', 'Jd', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr', 'Jr']
[['right'], ['down',

RuntimeError: 