In [2]:
import numpy as np
from random import random, choice

from colosseumrl.envs.tron import TronGridEnvironment, TronRender
from time import sleep

## An Intelligent Agent

Now we design an agent that can avoid some basic obstacles.

![genius](code.png)

In [3]:
class SimpleAvoidAgent:
    """ Basic single player agent to test single player version of Tron. """
    def __init__(self, noise=0.1):
        self.noise = noise

    def __call__(self, env, observation):
        # With some probability, select a random action for variation
        if random() <= self.noise:
            return choice(['forward', 'right', 'left'])
        
        # Get game information
        board = observation['board']
        head = observation['heads'][0]
        direction = observation['directions'][0]
        
        # Find the head of our body
        board_size = board.shape[0]
        x, y = head % board_size, head // board_size

        # Check ahead. If it's clear, then take a step forward.
        nx, ny = env.next_cell(x, y, direction, board_size)
        if board[ny, nx] == 0:
            return 'forward'

        # Check a random direction. If it's clear, then go there.
        offset, action, backup = choice([(1, 'right', 'left'), (-1, 'left', 'right')])
        nx, ny = env.next_cell(x, y, (direction + offset) % 4, board_size)
        if board[ny, nx] == 0:
            return action

        # Otherwise, go the opposite direction.
        return backup

## Test Performance

In [7]:
# Create a Tron environment on a 25x25 Grid
env = TronGridEnvironment.create(board_size=25, num_players=4)
renderer = TronRender(board_size=25, num_players=4)

# Create our agent with a 5% chance of executing a random action
agent = SimpleAvoidAgent(noise=0.05)

In [10]:
# Start the game with 4 players
state, players = env.new_state()
terminal = False

renderer.close()
renderer.render(state)

# Play until the game is over
while not terminal:
    # Let each player select an action for their respective observations
    actions = [agent(env, env.state_to_observation(state, player)) for player in players]
    
    # Perform actions simultaneously
    state, players, rewards, terminal, winners = env.next_state(state, players, actions)
    renderer.render(state)
    
    # Wait so we can see whats happening
    sleep(0.05)

# Finish up
if winners.size == 0:
    print(f"No single player won. Tie with rankings: {env.compute_ranking(state, players, winners)}")
else:
    print(f"Player {winners[0]} wins.")

Player 0 wins.
