In [209]:
from gym import Env
from gym.spaces import Discrete, Dict, MultiDiscrete
import numpy as np
from random import sample

%run Shop.ipynb
%run Battle.ipynb

In [224]:
class GameEnv(Env):
    def __init__(self, animal_dict = pack1_animal_dict, food_dict = pack1_food_dict):
        self.animal_dict = animal_dict
        self.food_dict = food_dict
        self.species_arr = np.concatenate(list(self.animal_dict.values()))
        self.food_arr = np.concatenate(list(self.food_dict.values()))
        
        self.n_species = len(self.species_arr) + 1
        self.n_foods = len(self.food_arr) + 1
        self.n_status = len(status_list)
        
        self.n_actions_dict = {'buy_pet_merge': 35,
                               'buy_pet_insert': 35,
                               'sell_pet': 5,
                               'combine_pets': 20,
                               'buy_food': 15,
                               'freeze_pet': 7,
                               'freeze_food': 3,
                               'unfreeze_pet': 7,
                               'unfreeze_food': 3,
                               'roll': 1,
                               'end_turn': 1}
        n_actions_cum = np.cumsum(list(self.n_actions_dict.values()))
        self.cum_actions_dict = {k: n_actions_cum[i] for i,k in enumerate(self.n_actions_dict.keys())}
        self.n_actions = n_actions_cum[-1]
        self.action_space = Discrete(self.n_actions)
        
        max_stats = 50
        self.max_n_foods = 3
        max_gold = 20
        self.max_turn = 5
        self.n_wins_goal = 2
        spaces = {'team_species': MultiDiscrete(nvec = [self.n_species] * Team.max_size),
                  'team_stats': MultiDiscrete(nvec = np.array([[max_stats, max_stats] for _ in range(Team.max_size)])),
                  'shop_species': MultiDiscrete(nvec = [self.n_species] * Shop.max_size),
                  'shop_stats': MultiDiscrete(nvec = np.array([[max_stats, max_stats] for _ in range(Shop.max_size)])),
                  'shop_foods': MultiDiscrete(nvec = [self.n_foods] * self.max_n_foods),
                  'gold': Discrete(max_gold),
                  'lives': Discrete(10),
                  'wins': Discrete(10),
                  'turn': Discrete(self.max_turn)}
        self.observation_space = Dict(spaces)
        
        self.shop = None
        self.n_actions_left = 0
        self.n_data = 6
        
        self.team_database = {i: [Team([])] for i in range(1, self.max_turn+1)}
        self.n_stored_teams = 10
        
        self.rewards_dict = {'buy_pet_merge':       0,
                             'buy_pet_insert':      0,
                             'sell_pet':           -0.1,
                             'combine_pets':        0,
                             'buy_food':            0,
                             'freeze_pet':         -0.1,
                             'freeze_food':        -0.1,
                             'unfreeze_pet':       -0.1,
                             'unfreeze_food':      -0.1,
                             'roll':               -0.1,
                             'end_turn_win':        1,
                             'end_turn_lose':      -1,
                             'end_turn_draw':      -0.5,
                             'end_turn_10wins':    10,
                             'end_turn_nolives':  -10,
                             'end_turn_maxturns': -10}
        
    def __str__(self):
        return str(self.shop)
        
    def step(self, action):
        reward = 0
        done = False
        info = {}
        
        if action < self.cum_actions_dict['buy_pet_merge']:
            from_ind = action % 7
            to_ind = action // 7
            self.shop.buy_pet(from_ind, to_ind, True)
            reward = self.rewards_dict['buy_pet_merge']
            print('bought pet\n', self.shop)
            
        elif action < self.cum_actions_dict['buy_pet_insert']:
            action = action - self.cum_actions_dict['buy_pet_merge']
            from_ind = action % 7
            to_ind = action // 7
            self.shop.buy_pet(from_ind, to_ind, False)
            reward = self.rewards_dict['buy_pet_insert']
            print('bought pet\n', self.shop)
            
        elif action < self.cum_actions_dict['sell_pet']:
            from_ind = action - self.cum_actions_dict['buy_pet_insert']
            self.shop.sell_pet(from_ind)
            reward = self.rewards_dict['sell_pet']
            print('sold pet\n', self.shop)
            
        elif action < self.cum_actions_dict['combine_pets']:  # combine pets
            action = action - self.cum_actions_dict['sell_pet']
            from_ind = action % 5
            to_ind = action // 5
            if to_ind >= from_ind:
                to_ind += 1
            self.shop.combine_pets(from_ind, to_ind)
            reward = self.rewards_dict['combine_pets']
            print('combined pet\n', self.shop)
            
        elif action < self.cum_actions_dict['buy_food']:
            action = action - self.cum_actions_dict['combine_pets']
            from_ind = action % 3
            to_ind = action // 3
            self.shop.buy_food(from_ind, to_ind)
            reward = self.rewards_dict['buy_food']
            print('bought food\n', self.shop)
            
        elif action < self.cum_actions_dict['freeze_pet']: 
            reward = self.rewards_dict['freeze_pet']
            self.shop.freeze_pet(action - self.cum_actions_dict['buy_food'])
            
        elif action < self.cum_actions_dict['freeze_food']:
            reward = self.rewards_dict['freeze_food']
            self.shop.freeze_food(action - self.cum_actions_dict['freeze_pet'])
            
        elif action < self.cum_actions_dict['unfreeze_pet']:
            reward = self.rewards_dict['unfreeze_pet']
            self.shop.unfreeze_pet(action - self.cum_actions_dict['freeze_food'])
            
        elif action < self.cum_actions_dict['unfreeze_food']:
            reward = self.rewards_dict['unfreeze_food']
            self.shop.unfreeze_food(action - self.cum_actions_dict['unfreeze_pet'])
            
        elif action < self.cum_actions_dict['roll']:
            reward = self.rewards_dict['roll']
            self.shop.roll()
            self.n_actions_left = max(4, self.shop.gold)
            
        elif action < self.cum_actions_dict['end_turn']:
            own_team = self.shop.end_turn()
            possible_opp_teams = self.team_database[self.shop.current_turn]
            opp_team = sample(possible_opp_teams, 1)[0]
            
            # get result
            b = Battle(own_team, opp_team, self.animal_dict, self.shop.current_turn, self.shop.current_tier)
            print('turn', self.shop.current_turn)
            print(b)
            result = b.battle()
            
            # upon a won/lost round, give a small positive/negative reward
            if result == 1:
                reward = self.rewards_dict['end_turn_win']
                # if this team won, add it to possible teams
                if len(possible_opp_teams) < self.n_stored_teams:
                    possible_opp_teams.append(own_team.deepcopy())
                elif np.random.sample() > .5:
                    possible_opp_teams.pop(sample(range(self.n_stored_teams), 1)[0])
                    possible_opp_teams.append(own_team.deepcopy())
            if result == 2:
                reward = self.rewards_dict['end_turn_lose']
            if result == 0:
                reward = self.rewards_dict['end_turn_draw']
                
            # start new turn
            self.shop.start_new_turn(result)
            self.n_actions_left = max(4, self.shop.gold)
            
            # upon a won/lost game, give a large positive/negative reward
            if self.shop.lives <= 0:
                done = True
                reward = self.rewards_dict['end_turn_nolives']
            elif self.shop.n_wins >= self.n_wins_goal:
                done = True
                reward = self.rewards_dict['end_turn_10wins']
            elif self.shop.current_turn > self.max_turn:
                done = True
                reward = self.rewards_dict['end_turn_maxturns']
        else:
            assert False, f'received action exceeded max action ({self.n_actions-1}), got: {action}'
                
        return self.shop_to_arrays(), reward, done, info
    
    def get_action_mask(self):
        '''
        Method used to generate action mask.
        '''
        if self.n_actions_left == 0:  # self-imposed rule: no actions left => either roll or end turn
            output = np.zeros(self.n_actions, bool)
            output[-2:] = False
            return output
        
        team = self.shop.team1
        shop = self.shop.team2
        foods = self.shop.foods
        t_max = Team.max_size
        s_max = Shop.max_size
        
        buy_pet_merge = np.zeros((5, 7), bool)
        for i, pet in enumerate(env.shop.team1):
            for j, animal in enumerate(env.shop.team2):
                if pet.species == animal.species and pet.lvl < 3 and animal.cost < env.shop.gold:
                    buy_pet_merge[i,j] = True
        buy_pet_merge = buy_pet_merge.flatten()
                    
        if len(team) == t_max:
            buy_pet_insert = np.zeros(35, bool)
        else:
            buy_pet_insert = np.zeros((5,7), bool)
            buy_pet_insert[:len(env.shop.team1)+1, :len(env.shop.team2)] = True
            cant_afford = np.array([pet.cost > env.shop.gold for pet in env.shop.team2], dtype=bool)
            buy_pet_insert[:, np.pad(cant_afford, (0, Shop.max_size-len(cant_afford)), constant_values=False)] = False
            buy_pet_insert = buy_pet_insert.flatten()  
        
        sell_pet = [i < len(team) for i in range(t_max)]
        
        combine_pets = np.zeros((4, 5), bool)
        for i in range(len(team)):
            for j in range(i+1, len(team)):
                if team[i].species == team[j].species:
                    combine_pets[i, j] = True
                    combine_pets[j-1, i] = True
        combine_pets = combine_pets.flatten()
            
        buy_food = np.zeros((5,3), bool)
        buy_food[:len(env.shop.team1), :len(env.shop.foods)] = True
        for i, food in enumerate(env.shop.foods):
            if food.cost > env.shop.gold:
                buy_food[:, i] = False
        buy_food = buy_food.flatten()
                
        freeze_pet = np.array([i<len(shop) and not shop[i].is_frozen for i in range(s_max)])
        
        freeze_food = np.array([i<len(foods) and not foods[i].is_frozen for i in range(self.max_n_foods)])
        
        unfreeze_pet = np.array([i<len(shop) and shop[i].is_frozen for i in range(s_max)])
        
        unfreeze_food = np.array([i<len(foods) and foods[i].is_frozen for i in range(self.max_n_foods)])
        
        roll = np.array([self.shop.gold >= 1])
        
        end_turn = np.array([self.shop.gold <= 3])
    
        return np.concatenate((buy_pet_merge, buy_pet_insert, sell_pet, combine_pets, buy_food, freeze_pet, 
                               freeze_food, unfreeze_pet, unfreeze_food, roll, end_turn))
    
    def possible_actions(self):
        return np.arange(self.n_actions)[self.get_action_mask()]
    
    def shop_to_arrays(self):
        '''
        Translates present shop state into np.array to be used as SAPnet input
        '''
        # team representation
        team_arr = np.zeros((Team.max_size, 7+self.n_species+len(status_list)), dtype='float32')
        for i, pet in enumerate(self.shop.team1):
            team_arr[i][0:7] = pet.atk, pet.hp, pet.lvl, pet.exp, pet.pos, pet.temp_atk, pet.temp_hp
            
            index1 = np.where(self.species_arr == pet.species)[0]
            assert len(index1) > 0, f'species {pet.species} should be in species_arr'
            team_arr[i][7 + index1[0]] = 1
            
            index2 = np.where([status == pet.status.name for status in status_list])
            assert len(index2) > 0, f'status {pet.status} should be in status_list'
            team_arr[i][7 + self.n_species + index2[0]] = 1
            
        team_arr[len(self.shop.team1): Team.max_size, 7 + self.n_species - 1] = 1
        
        # shop representation
        shop_arr = np.zeros((Shop.max_size, 4+self.n_species), dtype='float32')
        for i, pet in enumerate(self.shop.team2):
            shop_arr[i][0:4] = pet.atk, pet.hp, pet.cost, int(pet.is_frozen)
            index = np.where(self.species_arr == pet.species)[0]
            assert len(index) > 0, f'species {pet.species} should be in env.species_arr'
            shop_arr[i][4 + index[0]] = 1
            
        shop_arr[len(self.shop.team2): Shop.max_size, -1] = 1
                
        # foods representation
        foods_arr = np.zeros((self.max_n_foods, 2 + self.n_foods), dtype='float32')
        for i, food in enumerate(self.shop.foods):
            foods_arr[i][0:2] = food.cost, int(food.is_frozen)
            index = np.where(self.food_arr == food.name)[0]
            assert len(index) > 0, f'food {food.name} should be in env.food_arr'
            foods_arr[i][2 + index[0]] = 1
            
        foods_arr[len(self.shop.foods): self.max_n_foods, -1] = 1
        
        # data representation
        data_arr = np.array([self.shop.gold, self.shop.lives, self.shop.n_wins, self.shop.current_turn, 
                             int(self.shop.lost_last_battle), self.n_actions_left], dtype='float32')
        
        return team_arr, shop_arr, foods_arr, data_arr
    
    def render(self):
        return str(self.shop)
        
    def reset(self):
        self.shop = Shop(Team([]), self.animal_dict, self.food_dict)
        self.n_actions_left = 10
        return self.shop_to_arrays()

In [223]:
np.random.seed(0)
env = GameEnv()
env.reset()
env.shop.team1.insert_by_name('Duck')
env.shop.team1.insert_by_name('Dog')
env.shop.team1.insert_by_name('Duck')
env.shop.team2[1].cost = 11
env.shop.gold = 0
env.possible_actions()

array([ 70,  71,  72,  77,  80,  95,  98, 101, 110, 111, 112, 117, 131])

Action Dictionary:

* 0-34: buy pet (merge)
* 35-69: buy pet (insert)
* 70-74: sell pet
* 75-94: combine pets
* 95-109: buy food
* 110-116: freeze pet
* 117-119: freeze food
* 120-126: unfreeze pet
* 127-129: unfreeze food
* 130: roll
* 131: end turn