# Environment for RoPaSci360 board game

In [3]:
from gym import Env
from gym.spaces import Discrete, Box, Dict

from ropasci_game import RoPaSci360

import tensorflow as tf
import numpy as np
import random

In [4]:
INVALID_ACTION_REWARD = -10
VALID_ACTION_REWARD = 10
WIN_REWARD = 100
LOSS_REWARD = -100
EAT_TOKEN = 10

In [56]:
class RoPaSci360_game(Env):
    def __init__(self,
                player = 'upper',
                opponent = 'random',
                log = 'True'):
        
        # Constants
        self.max_turns = 360
        self.log = log
                
        #
        # Observation + Action spaces
        # ---------------------------
        #  Observations: RoPaSci board containing 61 hexes, with 9 types of maximum number of tokens for each player.
        #  Actions: (Every board position) * (Every board position)
        #
        # Note: not every action is legal
        #
        
        self.action_space = Dict({"symbol": Discrete(3), "position": Box(0, 61, shape = (2,))})
        self.observation_space = Box(low = np.int8(0), high = np.int8(-1), shape = (9, 9), dtype = np.int8)
        self.player = player
        self.player_2 = self.get_other_player()
        self.opponent = opponent
        self.game = RoPaSci360()
        self.board = self.game.new_board()
        self.reset()
    
    def seed(self ):pass
        
    def step(self, action):
        assert self.action_space.contains(action), "ACTION ERROR {}".format(action)
        
        reward = 0
        info = {'turn' : self.game.upper_turns,
                'move_type' : None,
                'player' : self.player}
        
        symbol = action['symbol']
        pos1, pos2 = action['position']
        piece = (symbol, pos1, pos2)
        print(symbol)
        print(pos1, pos2)
                
    def reset(self):
        self.board = self.game.new_board()
        
    def render(self):
        pass
    
    def get_other_player(self):
        if self.player == 'upper':
            return 'lower'
        elif self.player == 'lower':
            return 'upper'
    
    def random_agent(self):
        possible_moves = self.game.possible_moves(player = self.player_2)
        return np.random.choice(possible_moves)

In [60]:
env = RoPaSci360_game()
print(env.player_2)

lower


In [75]:
obs = Dict({"upper": Box(low=np.int8(0), high=np.int8(-1), shape = (9, 3), dtype = np.int8),
           'lower': Box(low=np.int8(0), high=np.int8(-1), shape = (9, 3), dtype = np.int8)})
print(obs.sample())

OrderedDict([('lower', array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]], dtype=int8)), ('upper', array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]], dtype=int8))])


In [78]:
print(np.full((9, 3), 0, dtype = np.int8))

[[0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]]
