# Environment for RoPaSci360 board game

In [1]:
from gym import Env
from gym.spaces import Discrete, Box

from state import *

import tensorflow as tf
import numpy as np
import random

In [19]:
class RoPaSci360(Env):
    def __init__(self,
                player = 'upper',
                opponent = 'random',
                log = 'True'):
        
        # Constants
        self.max_turns = 360
        self.log = log
                
        #
        # Observation + Action spaces
        # ---------------------------
        #  Observations: RoPaSci board containing 61 hexes, with 9 types of maximum number of tokens for each player.
        #  Actions: (Every board position) * (Every board position)
        #
        # Note: not every action is legal
        #
        self.action_space = Box(0, 60, shape=(2,), dtype=np.uint8)
        self.observation_space = Box(-9, 9, (9, 9))
        
        self.player = player
        self.opponent = opponent
        
        # reset and build state
        self.reset()
        
    def reset(self):
        self.game = GameState()
        self.game.turn_number = 0
        self.game.game_state = 'running'
        self.game.upper_inv = 0
        self.game.lower_inv = 0
        
        self.upper = list()
        self.lower = list()
        self.upper_throws = 9
        self.upper_throws = 9
        
        self.done = False
                
        return self.game
        
    def step(self, action):
        assert self.action_space.contains(action), "ACTION ERROR {}".format(action)
        
        if action not in self.game._actions(self.player):
            reward = INVALID_ACTION_REWARD
            return self.game, reward, self.done, self.info
        
        if self.done:
            return (self.game, 0.0, True, self.info)
        if self.move_count > MAX_TURNS:
            return (self.game, 0.0, True, self.info)
        
        reward = INVALID_ACTION_REWARD



    def render(self):
        pass
    

    def player_move(self, action):
        """
        Returns (state, reward, done)
        """
    
    def next_state(self, state, player, move):
        if state is None:
            state = self.state
        

In [20]:
env = RoPaSci360()

In [21]:
env.action_space

Box(0, 60, (2,), uint8)

In [18]:
env.observation_space.sample()

array([[ 7.1599236 ,  0.2389597 , -4.5485487 ,  8.585953  ,  7.773817  ,
        -5.979217  ,  8.150582  ,  1.9702885 , -2.907794  ],
       [-8.821841  , -4.0194397 ,  7.2110953 ,  2.0318654 ,  8.205598  ,
         0.27554655, -6.135358  , -5.9919424 , -8.182599  ],
       [-7.392586  , -4.923396  ,  1.7226194 , -4.7498293 ,  7.717319  ,
         2.0731888 , -8.515305  ,  6.2607    , -0.77352065],
       [-1.0993428 ,  7.488901  , -1.7685986 ,  1.8785001 ,  3.1430855 ,
         2.3066995 , -0.461651  ,  2.0162373 , -0.8056803 ],
       [-4.9400606 ,  7.9105973 ,  4.744974  , -3.232233  , -8.940723  ,
         2.9440832 ,  2.1417437 ,  2.6274514 ,  7.3903027 ],
       [-6.821703  ,  6.0633893 ,  6.844443  , -4.721934  , -4.7047343 ,
        -2.8085318 ,  3.5446517 ,  3.560094  , -2.2214005 ],
       [ 5.776627  , -2.1017292 ,  3.8238335 ,  1.5583065 ,  1.921359  ,
        -1.9930351 ,  7.953047  ,  8.380244  ,  2.4116373 ],
       [ 2.657094  ,  8.39559   ,  7.0630627 ,  1.729713  ,  5