In [1]:
from colosseumrl.envs.tron import TronGridEnvironment, TronRender, TronRllibEnvironment, rllib
import numpy as np
from time import sleep
from random import random, choice
from tabulate import tabulate
from random import sample

In [2]:
class smartTronRL(TronRllibEnvironment): 
    def __init__(self, noise=0.1):
        self.noise = noise
        self.reward_table = np.zeros((25,25))
    
    def step(self, action_dict):
        observation, reward, done, info = super().step(action_dict)
        return observation, reward, done, info
        
        
    def statesPlayersIn(self, state, player):
        retval = []
        rowNum = 0
        for row in state[0]:
            colNum = 0
            for col in row:
                if(col==player):
                    retval.append((rowNum,colNum))
                colNum += 1
            rowNum += 1
        return retval
    
    
            
    def simpleAvoidAgent(self,env,observation):
        #if random() <= self.noise:
            #return choice(['forward', 'right', 'left'])

        # Get game information
        board = observation['board']
        head = observation['heads'][0]
        direction = observation['directions'][0]

        # Find the head of our body
        board_size = board.shape[0]
        x, y = head % board_size, head // board_size

        # Check ahead. If it's clear, then take a step forward.
        nx, ny = env.next_cell(x, y, direction, board_size)
        if board[ny, nx] == 0:
            return 'forward'

        # Check a random direction. If it's clear, then go there.
        offset, action, backup = choice([(1, 'right', 'left'), (-1, 'left', 'right')])
        nx, ny = env.next_cell(x, y, (direction + offset) % 4, board_size)
        if board[ny, nx] == 0:
            return action

        # Otherwise, go the opposite direction.
        return backup
    
    def occupiedStates(self,board):
        occupiedStates = [] 
        row_ = 0
        for spott in board:
            col_ = 0
            for spot in spott:
                if spot > 0:
                    occupiedStates = occupiedStates + [(row_,col_)]
                col_ += 1
            row_ += 1
        return occupiedStates
    
    def rewardTheTable(self,occupiedStates):
        for i in range(25):
            for j in range(25):
                if (i,j) in occupiedStates:
                    self.reward_table[i,j] = 0 
                else:
                    self.reward_table[i,j] += 1 #rewards[i]
                    
    
    def strDirection(self,direction):
        if direction == 2:
            return "up"
        elif direction ==3:
            return "right"
        elif direction ==1:
            return "left"
        else:
            return "down"
        
    def __call__(self,env,observation,player,state):
        # only player 0 uses my AI ; player 1-3 uses simple avoid agent
        if(player > 0):
            #print("player: " + str(player))
            return self.simpleAvoidAgent(env,observation)
        else:
            # With some probability, select a random action for variation
            #if random() <= self.noise:
                #return choice(['forward', 'right', 'left'])
            #print("player0: " + str(player))
            board = observation['board']

            #occupiedStates = self.occupiedStates(board)
            #occupiedStates = self.statesPlayersIn(state,player)
            occupiedStates = self.statesPlayersIn(state, 1) + self.statesPlayersIn(state, 2) + self.statesPlayersIn(state, 3) + self.statesPlayersIn(state, 4)

            self.rewardTheTable(occupiedStates)

            head = observation['heads'][0]
            direction = observation['directions'][0]
            board_size = board.shape[0]
            x, y = head % board_size, head // board_size
            nx, ny = env.next_cell(x, y, direction, board_size)
            
#             if player==0:
#                 print(direction)
            
            # calculate rewards for each direction
            rightreward=0
            for i in range(25-x):
                rightreward += self.reward_table[y,x+i]
            leftreward=0
            for i in range(x):
                leftreward += self.reward_table[y,i]
            upreward=0
            for i in range(y):
                upreward += self.reward_table[i,x]
            downreward=0
            for i in range(25-y):
                downreward += self.reward_table[y+i,x]

            # choose direction based on highest reward
            
            if board[ny,nx]==0:
                return 'forward'
            if direction==2:#ny == y-1: # up direction
                #print("up? " + self.strDirection(direction))
                if y>0 and board[y-1,x]==0 and upreward>=rightreward and upreward>= leftreward:
                    return 'forward'
                elif x<24 and board[y, x+1]==0 and rightreward >= leftreward:
                    return 'right'
                elif x>0 and board[y, x-1]==0 and leftreward >= rightreward:
                    return 'left'
                elif y>0 and board[y-1,x]==0:
                    return 'forward'
                elif x<24 and board[y, x+1]==0:
                    return 'right'
                else:
                    return 'left'
            elif direction==3:#nx == x+1: #right direction
                #print("right? " + self.strDirection(direction))
                if x<24 and board[y, x+1]==0 and rightreward >= upreward and rightreward >= downreward:
                    return 'forward'
                elif y<24 and board[y+1, x] and downreward >= upreward:
                    return 'right'
                elif y>0 and board[y-1,x]==0 and upreward >= downreward:
                    return 'left'
                elif x<24 and board[y, x+1]==0:
                    return 'forward'
                elif y>0 and board[y-1,x]==0:
                    return 'left'
                elif y<24 and board[y+1,x]==0:
                    return 'right'
                else:
                    return 'left'
            elif direction==1:#nx == x-1: #left direction
                #print("left? " + self.strDirection(direction))
                if x>0 and board[y, x-1]==0 and leftreward >= upreward and leftreward >= downreward:
                    return 'forward'
                elif y<24 and board[y+1, x] and downreward >= upreward:
                    return 'left'
                elif y>0 and board[y-1,x]==0 and upreward >= downreward:
                    return 'right'
                elif x>0 and board[y,x-1]==0:
                    return 'forward'
                elif y<24 and board[y+1, x]:
                    return 'left'
                else:
                    return 'right'
            else: #down direction
                #print("down? " + self.strDirection(direction))
                if y<24 and board[y+1,x]==0 and downreward >= leftreward and downreward >= rightreward:
                    return 'forward'
                elif x<24 and board[y, x+1]==0 and rightreward >= leftreward:
                    return 'left'
                elif x>0 and board[y, x-1]==0 and leftreward >= rightreward:
                    return 'right'
                elif y<24 and board[y+1,x]==0:
                    return 'forward'
                elif x<24 and board[y, x+1]==0:
                    return 'left'                
                else:
                    return 'right'

            
   
    
    def play(self):
        env = self.create_env(board_size=25, num_players=4)
        renderer = TronRender(board_size=25, num_players=4)
        agent = self

        state, players = env.new_state()
        terminal = False

        renderer.close()
        renderer.render(state)

        while not terminal:
            #actions = [agent(env, env.state_to_observation(state, player)) for player in players] 
            actions = [agent(env, env.state_to_observation(state, player),player,state) for player in players]
    
            new_state, players, rewards, terminal, winners = env.next_state(state, players, actions)
            renderer.render(state) 

            state = new_state
            sleep(0.05)
        if winners.size == 0:
            print(f"No single player won. Tie with rankings: {env.compute_ranking(state, players, winners)}")
        else:
            print(f"Player {winners[0]} wins.")

In [None]:
ttrl = smartTronRL()
for x in range(5):
    smartTronRL.play(ttrl)
    #print(ttrl.reward_table)

No single player won. Tie with rankings: {0: 0, 2: 0, 1: 0, 3: 3}
Player 3 wins.
Player 1 wins.
