# Environment

In [14]:
import numpy as np

In [94]:
class Santorini:
    def __init__(self, board_dim = (5,5), parts = [0,22,18,14,18]):
        #action_space: 2 workers * 8 moves * 8 builds = 128 options
        #moves/builds: q,w,e,a,d,z,x,c
        self.workers = [-1,-2]
        self.moves = self.builds = ['q','w','e','a','d','z','x','c']
        #board[buildings/workers, vertical, horizontal]
        self.ktoc = {'q':(-1,-1),
                     'w':(-1,0),
                     'e':(-1,1),
                     'a':(0,-1),
                     'd':(0,1),
                     'z':(1,-1),
                     'x':(1,0),
                     'c':(1,1)}
        self.itoa = [(w,m,b) for w in self.workers for m in self.moves for b in self.builds]
        self.n_action = len(self.itoa)
        
        self.reset(board_dim, parts)

    def reset(self, board_dim, parts):
        #building pieces
        #floor, base, mid, top, dome
        self.parts = parts
        
        #keep track of players
        #-1, 0 , 1 for player 1, blank, player 2
        self.current_player = -1
        
        #two layers: building pieces, workers
        self.board_dim = board_dim
        self.board = np.zeros((2, board_dim[0], board_dim[1]), dtype=np.int64)
        
        self.board[1,0,2], self.board[1,4,2] = -1, -2 #negative workers for player 1
        self.board[1,2,0], self.board[1,2,4] =  1, 2 #positive workers for player 2
        
        return(self.get_state())
        
    def print_board(self):
        print(f'Buildings:\n {self.board[0,:,:]}')
        print(f'Workers:\n {self.board[1,:,:]}')
    
    def get_state(self):
        #current player has negative workers; opposing player has positive workers
        sgn = -np.sign(self.current_player)
        state = self.board.copy()
        state[1,:,:]*=sgn
        return(state)
    
    def score(self):
        #get position of current player's workers
        worker_idx = np.sign(self.get_state()[1,:,:])==-1
        #check if workers at those positions are on top
        if (self.board[0,:,:][worker_idx] == 3).any():
            reward = 1
        else:
            reward = 0
        return(reward)
        
    def move(self,worker,key):
        #worker is either -1, -2; pov of current player
        if worker not in [-1,-2]: raise ValueError('Wrong Worker')
        
        #get source and destinations
        state = self.get_state()
        worker_idx = np.where(state[1,:,:]==worker)
        src = (worker_idx[0][0],worker_idx[1][0])
        worker_num = self.board[1,src[0], src[1]]
        delta = self.ktoc[key]
        dest = (src[0]+delta[0],src[1]+delta[1])
        
        #check if correct turn
        if np.sign(self.board[1,src[0],src[1]])!=self.current_player:
            raise ValueError('Wrong Player')
        
        #check legality of the move; within the board, one level, no one standing
        inbound =  (-1 < dest[0] < self.board_dim[0]) & (-1 < dest[1] < self.board_dim[1])
        blank_tile = self.board[1,dest[0],dest[1]]==0
        one_level = ((self.board[0,dest[0],dest[1]] - self.board[0,src[0],src[1]]) <=1)

        if inbound & one_level & blank_tile:
            self.board[1,src[0],src[1]] = 0
            self.board[1,dest[0],dest[1]] = worker_num
        else:
            print(f'Illegal Build\n Inbound: {inbound}\n One Level: {one_level}\n Blank Tile: {blank_tile}')
            print(inbound,one_level,blank_tile)
            raise ValueError(f'Illegal Move')
    
    def build(self,worker,key):
        #worker is either -1, -2; pov of current player
        if worker not in [-1,-2]: raise ValueError('Wrong Worker')
        
        #get source and destinations
        state = self.get_state()
        worker_idx = np.where(state[1,:,:]==worker)
        src = (worker_idx[0][0],worker_idx[1][0])
        worker_num = self.board[1,src[0], src[1]]
        delta = self.ktoc[key]
        dest = (src[0]+delta[0],src[1]+delta[1])
        
        #check tower size legality
        to_build = self.board[0,dest[0],dest[1]] + 1
        if to_build <=4:
            parts_left = self.parts[to_build]
        else:
            raise ValueError('Building too tall')
            
        #check if correct turn
        if np.sign(self.board[1,src[0],src[1]])!=self.current_player:
            raise ValueError('Wrong Player')
            
        #check legality of the build; within the board, enough parts, no one standing
        inbound =  (-1 < dest[0] < self.board_dim[0]) & (-1 < dest[1] < self.board_dim[1])
        enough_parts = parts_left > 0
        blank_tile = self.board[1,dest[0],dest[1]]==0
        if inbound & enough_parts & blank_tile:
            self.board[0,dest[0],dest[1]] = to_build
            self.parts[to_build] -= 1
        else:
            print(f'Illegal Build\n Inbound: {inbound}\n Enough Parts: {enough_parts}\n Blank Tile: {blank_tile}')
            raise ValueError('Illegal Build')
                  
    def step(self,action_idx, switch_player=True):
        worker,move_key,build_key = self.itoa[action_idx]
        #try to move
        try:
            self.move(worker,move_key)
        except:
            next_state = self.get_state()
            reward = -10
            done = True
            if switch_player: self.current_player *= -1
            return(next_state,reward,done,self.current_player)
            
        #try to build
        try:
            self.build(worker,build_key)
        except:
            next_state = self.get_state()
            reward = -10
            done = True
            if switch_player: self.current_player *= -1
            return(next_state,reward,done,self.current_player)
        
        next_state = self.get_state()
        reward = self.score()
        done = True if reward==1 else False
        if switch_player: self.current_player *= -1
        return(next_state,reward,done,self.current_player)

In [95]:
s = Santorini()
s.current_player = 1
s.get_state()

array([[[ 0,  0,  0,  0,  0],
        [ 0,  0,  0,  0,  0],
        [ 0,  0,  0,  0,  0],
        [ 0,  0,  0,  0,  0],
        [ 0,  0,  0,  0,  0]],

       [[ 0,  0,  1,  0,  0],
        [ 0,  0,  0,  0,  0],
        [-1,  0,  0,  0, -2],
        [ 0,  0,  0,  0,  0],
        [ 0,  0,  2,  0,  0]]])

In [96]:
s.step(0,False)

Illegal Build
 Inbound: False
 One Level: True
 Blank Tile: True
False True True


(array([[[ 0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0]],
 
        [[ 0,  0,  1,  0,  0],
         [ 0,  0,  0,  0,  0],
         [-1,  0,  0,  0, -2],
         [ 0,  0,  0,  0,  0],
         [ 0,  0,  2,  0,  0]]]), -10, True, 1)

In [81]:
s.current_player

-1

In [227]:
idx = np.where(s.board[1,:,:]==-1)

In [230]:
s.board[1,idx[0],idx[1]]

array([-1])

In [64]:
s.itoa[9]

(-1, 'w', 'w')

In [339]:
if 0<x<2:
    print('x')

x


In [253]:
s.board[1,x[0],x[1]]

0

In [308]:
x = (1,1)

In [309]:
x>0

TypeError: '>' not supported between instances of 'tuple' and 'int'

In [1]:
import gym

In [3]:
env = gym.make('Taxi-v2')

In [4]:
env.reset()

247

In [12]:
env.step(1)

(147, -1, False, {'prob': 1.0})