In [None]:
#Necessary Dependencies:
#!pip install gym_tetris
#!pip install nes-py

In [None]:
#Necessary Dependencies:
#pip install gym_tetris
from nes_py.wrappers import JoypadSpace
import gym_tetris
from gym_tetris.actions import MOVEMENT
import numpy as np
import matplotlib.pyplot as plt
import time

#Necessary constant
#This implementation uses the NTSC version of tetris, which has slightly different frames for the falling pieces than the PAL version.
#Source for fall frames: https://listfist.com/list-of-tetris-levels-by-speed-nes-ntsc-vs-pal
SpeedtoFallFrames = [48, 43, 38, 28, 23, 18, 13, 8, 6, 5, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1]

#This is what corresponds to the piece location/rotation, which is found via self.env.ram[0x0042].
#It's not particularly useful for our purposes because info["current_piece"] is just outright better, but it is still noteworthy.
PieceOrientation = {
    "T" : [0, 1, 2, 3],
    "J" : [4, 5, 6, 7],
    "Z" : [8, 9],
    "O" : [10],
    "S" : [11, 12],
    "L" : [13, 14, 15, 16],
    "I" : [17, 18]
    
}



#Takes the board ripped out from the NES RAM and converts it to a simpler board to use
#Is mostly necessary because the board in the NES RAM also takes into account the colors of the pieces, which is not necessary for the algorithm to know.
def ColorBoardtoSimpleBoard(board):
    simpleboard = []
    for row in board:
        simplerow = []
        for cell in row:
            if cell == 239:
                simplerow.append(0)
            else:
                simplerow.append(1)
        simpleboard.append(tuple(simplerow))
    return tuple(simpleboard)
    

#To use this class, simply run the following line:
#MicroState(ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), info["current_piece"], tuple(self.env.ram[0x0040:0x0042]), self.env.ram[0x0044])
class MicroState:
    
    def __init__(self, boardstate, currentpiece, piecelocation, speed):
        self.boardstate = boardstate
        self.currentpiece = currentpiece
        self.piecelocation = piecelocation
        self.piecerotation = piecerotation
        self.speed = speed
        self.fallframes = SpeedtoFallFrames[min(speed, 29)]

#To use this class, simply run the following line:
##MacroState(ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), info["current_piece"], info["next_piece"])
class MacroState:
    
    def __init__(self, boardstate, currentpiece, nextpiece):
        self.boardstate = boardstate
        self.currentpiece = currentpiece
        self.nextpiece = nextpiece

class Agent:
    
    def __init__(self, episodes=1):
        self.env = gym_tetris.make('TetrisA-v2',deterministic = True)
        self.env = JoypadSpace(self.env, MOVEMENT)
        #self.env.deterministic = True
        #Testing to see whether using the pixels for the state works better than just the board.
        #self.state = self.env.reset()
        self.env.reset()
        #self.env.deterministic = True
        self.env.render()
        self.state = tuple([ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), "", "", tuple(self.env.ram[0x0040:0x0042])])
        #print(self.env.ram[0x0044])
        
        self.highscore = 0
        self.time = 0
        self.linestates = []
        self.listofhighscores = []
        self.listofhighscorerates = []
        self.listofsafetyscores = []
        
        self.actions = MOVEMENT
        self.state_actions = []  # state & action track

        self.episodes = episodes  # number of episodes going to play
        self.steps_per_episode = []
        
        
        
    def chooseAction(self):
        action = ""
        #Insert macro/micro algorithm here:
        
        #Right now just have the agent make random actions until we get the 2-stage algorithm working
        #print(self.actions)
        action = self.actions[np.random.choice(len(self.actions))]
        
        
        
        return action
    
    def reset(self):
        self.env.close()
        self.env = gym_tetris.make('TetrisA-v2',deterministic = True)
        self.env = JoypadSpace(self.env, MOVEMENT)
        #self.env.deterministic = True
        self.env.reset()
        #self.env.deterministic = True
        self.env.render()
        self.state = tuple([ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), "", "", tuple(self.env.ram[0x0040:0x0042])])
        #Conversion of self.state to tuple for hashing purposes
        #self.state = tuple([tuple(x) for x in self.state])
        self.state_actions = []
        self.highscore = 0
        self.time = 0
        self.linestates = []


    def play(self):
        self.steps_per_episode = []  
        
        for ep in range(self.episodes):
            done = False
            while not done:

                action = tuple(self.chooseAction())
                self.state_actions.append((self.state, action))
                #try:
                #    previnfo = info["current_piece"]
                #    prevpos = tuple(self.env.ram[0x0040:0x0042])
                #except:
                #    j = "j"

                unusedstate, reward, done, info = self.env.step(self.env.action_space.sample())
                #print(self.env.ram[0x0045])
                #try:
                #    if previnfo[0] != info["current_piece"][0]:
                #        print(previnfo)
                #        print(prevpos)
                #except:
                #    j = "j"
                #print(info["current_piece"])
                #print(info["next_piece"])
                #print(self.env.ram[0x0042])
                
                self.env.render()
                nxtState = tuple([ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()), info["current_piece"], info["next_piece"], tuple(self.env.ram[0x0040:0x0042])])
                self.highscore = info["score"]
                self.time += 1
                self.linestates.append(info["board_height"])

            # end of game
            #if ep % 10 == 0:
            self.listofhighscores.append(self.highscore)
            self.listofhighscorerates.append(self.highscore / self.time)
            self.listofsafetyscores.append(sum(self.linestates) / self.time)
            print("episode", ep)
            print("Highscore: " + str(self.highscore))
            print("Score rate: " + str(self.highscore / self.time))
            print("Safety score: " + str(sum(self.linestates) / self.time))
            self.steps_per_episode.append(len(self.state_actions))
            #print(ColorBoardtoSimpleBoard(self.env.ram[0x0400:0x04C8].reshape((20, 10)).copy()))
            #self.reset()
        #self.env.close()
if __name__ == "__main__":
    N_EPISODES = 1
    # comparison
    agent = Agent(episodes=N_EPISODES)
    agent.play()

    highscores = agent.listofhighscores
    highscorerates = agent.listofhighscorerates
    safetyscores = agent.listofsafetyscores

    plt.figure(figsize=[10, 6])
    plt.ylim(0, 50)
    plt.plot(range(N_EPISODES), highscores, label="high score")
    plt.legend()
        
    plt.figure(figsize=[10, 6])
    plt.ylim(0, 0.1)
    plt.plot(range(N_EPISODES), highscorerates, label="score rate")
    plt.legend()
    
    plt.figure(figsize=[10, 6])
    plt.ylim(4, 12)
    plt.plot(range(N_EPISODES), safetyscores, label="safety score")
    plt.legend()

