In [1]:
# https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/envs/hungry_geese/hungry_geese.py

In [5]:
import pickle
import keras
import base64
import bz2

In [6]:
model = keras.models.load_model('trial-9800')

In [7]:
weight_base64 = base64.b64encode(bz2.compress(pickle.dumps(model.get_weights())))
w = "weight= %s"%weight_base64
%store w >submission.py

Writing 'w' (str) to file 'submission.py'.


In [8]:
%%writefile -a submission.py
import pickle
import base64
import bz2
from kaggle_environments.envs.hungry_geese.hungry_geese import Observation, Configuration, Action, \
                                                                row_col, adjacent_positions, translate, min_distance

from kaggle_environments import make
from random import choice
import numpy as np

import random
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

from collections import deque

def geese_heads(obs_dict, config_dict):
    """
    Return the position of the geese's heads
    """
    configuration = Configuration(config_dict)

    observation = Observation(obs_dict)
    player_index = observation.index
    player_goose = observation.geese[player_index]
    player_head = player_goose[0]
    player_row, player_column = row_col(player_head, configuration.columns)
    positions = []
    for geese in observation.geese:
        if len(geese)>0:
            geese_head = geese[0]
            row, column = row_col(geese_head, configuration.columns)
        else:
            row = None
            column = None
        positions.append((row, column))
    return positions

def get_last_actions(previous_geese_heads, heads_positions):

    def get_last_action(prev, cur):
        last_action = None

        prev_row = prev[0]
        prev_col = prev[1]
        cur_row = cur[0]
        cur_col = cur[1]

        if cur_row is not None:
            if (cur_row-prev_row == 1) | ((cur_row==0) & (prev_row==6)):
                last_action = Action.SOUTH.name
            elif (cur_row-prev_row == -1) | ((cur_row==6) & (prev_row==0)):
                last_action = Action.NORTH.name
            elif (cur_col-prev_col == 1) | ((cur_col==0) & (prev_col==10)):
                last_action = Action.EAST.name
            elif (cur_col-prev_col == -1) | ((cur_col==10) & (prev_col==0)):
                last_action = Action.WEST.name

        return last_action

    if len(previous_geese_heads) == 0:
        actions = [Action.SOUTH.name, Action.NORTH.name, Action.EAST.name, Action.WEST.name]
        nb_geeses = len(heads_positions)
        last_actions = [actions[np.random.randint(4)] for _ in range(nb_geeses)]
    else:   
        last_actions = [get_last_action(*pos) for pos in zip(previous_geese_heads, heads_positions)]

    return last_actions
    
def central_state_space(obs_dict, config_dict, prev_head):
    """
    Recreating a board where my agent's head in the middle of the board 
    (position (4,5)), and creating features accordingly
    """
    
    configuration = Configuration(config_dict)

    observation = Observation(obs_dict)
    player_index = observation.index
    player_goose = observation.geese[player_index]
    if len(player_goose)==0:
        player_head = prev_head
    else:
        player_head = player_goose[0]
    player_row, player_column = row_col(player_head, configuration.columns)
    row_offset = player_row - 3
    column_offset = player_row - 5

    foods = observation['food']

    def centralize(row, col):
        if col > player_column:
            new_col = (5 + col - player_column) % 11
        else:
            new_col = 5 - (player_column - col)
            if new_col < 0:
                new_col += 11

        if row > player_row:
            new_row = (3 + row - player_row) % 7
        else:
            new_row = 3 - (player_row - row)
            if new_row < 0:
                new_row += 7
        return new_row, new_col

    food1_row, food1_column = centralize(*row_col(foods[0], configuration.columns))
    food2_row, food2_column = centralize(*row_col(foods[1], configuration.columns))

    food1_row_feat = float(food1_row - 3)/3 if food1_row>=3 else float(food1_row - 3)/3
    food2_row_feat = float(food2_row - 3)/3 if food2_row>=3 else float(food2_row - 3)/3

    food1_col_feat = float(food1_column - 5)/5 if food1_column>=5 else float(food1_column - 5)/5
    food2_col_feat = float(food2_column - 5)/5 if food2_column>=5 else float(food2_column - 5)/5
    food_feats = np.array([food1_row_feat, food2_row_feat, food1_col_feat, food2_col_feat])

    # Create the grid
    board = np.zeros([7, 11])
    # Add food to board
#     board[food1_row, food1_column] = 1
#     board[food2_row, food2_column] = 1

    for ind, goose in enumerate(observation.geese):
        if ind!= player_index:
            if len(goose)>0:

                ap = adjacent_positions(goose[0], 11, 7)
                for p in ap:
                    row, col = centralize(*row_col(p, configuration.columns))    
                    board[row,col]=-.33             

        for pos in goose:
            if len(goose)>0:
                # set bodies to 1
                row, col = centralize(*row_col(pos, configuration.columns))
                board[row, col]=-1
                
   
        if len(goose)>1:
            # Set tails to -0.1 if the goose has a tail
            row, col = centralize(*row_col(goose[-1], configuration.columns))
            board[row, col]=-0.1    
                
    board[3, 5] = 0
    return board, food_feats

def min_dir(p1, p2, max_p):
    """
    min distance and direction from p1
    """
    direction = 'left' # left by default
    d1 = abs(p1 - p2) # Distance going across board
    d2 = min(abs(p1-0), abs(p1 - max_p)) + min(abs(p2-0), abs(p2 - max_p)) # Distance wrapping around board
    
    if p1>p2 and d1<d2:
        direction = 'left'
        
    elif p1>p2 and d1>d2:
        direction ='right'
    
    elif p2>p1 and d1<d2:
        direction = 'right'
    
    elif p2>p1 and d1>d2:
        direction = 'left'
    
    dir_vec = np.zeros(2)
        
    if direction=='left':
        dir_vec[0]=1
    
    else:
        dir_vec[1]=1
    
    min_dist = np.array([min(d1, d2)])/max_p
    return dir_vec, min_dist
        

class StateTranslator_Central:
    """
    Returns a board where we are always at the center
    """
    
    def __init__(self):
        
        self.last_action = None
        self.step_count = 0
        self.last_goose_length = 1
        self.last_goose_ind = 0
        self.observations = []
        
    def set_last_action(self, last_action):
        self.last_action = last_action
        
        
    def __get_last_action_vec(self):
        action_vec = np.zeros(4)
        
        if self.last_action == 'NORTH':
            action_vec[0] = 1
        elif self.last_action == 'SOUTH':
            action_vec[1] = 1
        elif self.last_action == 'EAST':
            action_vec[2] = 1
        elif self.last_action == 'WEST':
            action_vec[3] = 1
        
        return action_vec
    
    def translate_action_to_text(self, action):
        
        h = {0 : 'WEST',
             1: 'EAST',
             2: 'NORTH',
             3: 'SOUTH'}
        
        return h[action]

    def translate_text_to_int(self, action):
        h = {'WEST':0,
            'EAST':1,
            'NORTH':2,
            'SOUTH':3}
        
        return h[action]
    
    
    def update_length(self):
        self.last_goose_length = self.current_goose_length
        
    
    def get_state(self, observation, config):
        
        #### This is exception handling for if our goose died this turn, to use the last
        ### known index as its postion for the state centralizer
        geese = observation['geese']
        if len(geese[observation['index']])>0:
            self.last_goose_ind = geese[observation['index']][0]
            self.my_goose = geese[observation['index']][0]
            
        
        board, food_feats = central_state_space(observation, config, self.last_goose_ind)
        self.step_count = observation['step']
        
        
        self.current_goose_length = len(geese[observation['index']])  
        
        
        food = observation['food']
        dir_vec1x, min_dist1x = min_dir(self.my_goose, food[0] , 11)
        dir_vec1y, min_dist1y = min_dir(self.my_goose, food[0] , 7)
        
        dir_vec2x, min_dist2x = min_dir(self.my_goose, food[1] , 11)   
        dir_vec2y, min_dist2y = min_dir(self.my_goose, food[1] , 7)  
        food_vec = np.concatenate((dir_vec1x, dir_vec1y, dir_vec2x, dir_vec2y,
                            min_dist1x, min_dist1y, min_dist2x, min_dist2y))
        ####
        biggest_goose = 0
        alive_geese = 0
        for ind, goose in enumerate(geese):
            if len(goose)>biggest_goose:
                biggest_goose = len(goose)
            if ind != observation['index'] and len(goose)>0:
                alive_geese+=1
        
        state = np.array([])
        state = np.append(state, self.__get_last_action_vec())
        state = np.append(state, self.current_goose_length/15)
        state = np.append(state, biggest_goose/15)
        state = np.append(state, alive_geese/4)
        state = np.append(state, self.step_count/200)
        state = np.append(state, food_vec)
        state = np.append(state, board.flatten())
        
        state = state.flatten()
        
        return state
    
    def calculate_reward(self, observation):
        
        current_geese = observation['geese']
        prev = self.last_goose_length
        cur = len(current_geese[observation['index']])  
        
        reward = -1       
    
        ### If we grow, reward is 100
        if cur>prev:
            reward = 100
        
        else:
            reward = -1
        
        ### If we die -150
        if cur == 0:
            reward = -150
            
        ### see if any geese are alive

        alive_geese = 0
        for ind, goose in enumerate(current_geese):
            if ind != observation['index'] and len(goose)>0:
                alive_geese+=1
        
        # If we are the last one standing
        if alive_geese == 0 and cur>0:
            reward = 500
            
        ### if the game ends and we are the biggest
        if self.step_count == 200:
            biggest_goose = 0
            biggest_goose_ind = None
            for ind, goose in enumerate(current_geese):
                if len(goose)>biggest_goose:
                    biggest_goose = len(goose)
                    biggest_goose_ind = ind
            
            if biggest_goose_ind == observation['index']:
                reward = 500
        
        return reward

class dqnAgent:
    """
    Given an environment state, choose an action, and learn from the reward
    https://towardsdatascience.com/reinforcement-learning-w-keras-openai-dqns-1eed3a5338c
    https://towardsdatascience.com/deep-q-learning-tutorial-mindqn-2a4c855abffc
    https://www.researchgate.net/post/What-are-possible-reasons-why-Q-loss-is-not-converging-in-Deep-Q-Learning-algorithm
    """

    def __init__(self, model=None, epsilon = 1.0, epsilon_min = 0.15):

        self.StateTrans = StateTranslator_Central()
        self.state_shape  = 97
        print('my state shape is:', self.state_shape)
        self.memory  = deque(maxlen=8000)
        self.gamma = 0.95
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = 0.999
        self.learning_rate = 0.001
        self.tau = .125

        if model == None:
            self.model = self.create_model()
        else:
            self.model = model
        self.target_model = self.create_model()

    def create_model(self):
        model   = Sequential()
        model.add(Dense(2000, input_dim=self.state_shape, activation="relu"))
        model.add(Dense(1000, activation="relu"))
        model.add(Dense(500, activation="relu"))
        model.add(Dense(1000, activation="relu"))
        model.add(Dense(500, activation="relu"))
        model.add(Dense(100, activation="relu"))
        model.add(Dense(4))
        model.compile(loss="MSE",
            optimizer=Adam(lr=self.learning_rate))
        return model

    def act(self, state):
        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)
        if np.random.random() < self.epsilon:
            # Set random choice to north or east as the agent is not moving in these directions
            return random.choice([0,1,2,3])

        action_values = self.model.predict(state.reshape(-1, self.state_shape))[0]
        action = np.argmax(action_values)

        return action
    
    def translate_state(self, observation, configuration):
        state = self.StateTrans.get_state(observation, configuration)
        return state

    def __call__(self, observation, configuration):
        
        state = self.translate_state(observation, configuration)
        action = self.act(state)
        # State translator will take in 0, 1, 2, 3 and return straight, left or right, which in turn will 
        # be translated into a kaggle Action
        action_text = self.StateTrans.translate_action_to_text(action)
        
        # Update our step number and actions
        self.StateTrans.set_last_action(action_text)
        
        return action_text
    
    def remember(self, state, action, reward, new_state, done):
        self.memory.append([state, action, reward, new_state, done])

    def replay(self):
        batch_size = 32
        if len(self.memory) < batch_size:
            return

        samples = random.sample(self.memory, batch_size)
        ########################
        # This can be sped up significantly, but processing all samples in batch rather than 1 at a time
        ####################
        for sample in samples:
            state, action, reward, new_state, done = sample
            target = self.target_model.predict(state.reshape(-1, self.state_shape))
            if done:
                target[0][action] = reward
            else:
                Q_future = max(self.target_model.predict(new_state.reshape(-1, self.state_shape))[0])
                target[0][action] = reward + Q_future * self.gamma
            self.model.fit(state.reshape(-1, self.state_shape), target, epochs=1, verbose=0)

    def target_train(self):
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
        self.target_model.set_weights(target_weights)

    def save_model(self, fn):
        self.model.save(fn)

dqn = dqnAgent(epsilon_min=0,
               epsilon=0)

dqn.model.set_weights(pickle.loads(bz2.decompress(base64.b64decode(weight))))


def agent(observation, config):
    
    action = dqn(observation, config)
    
    geese = observation['geese']
    
    opponents = [
            goose
            for index, goose in enumerate(geese)
            if index != observation.index and len(goose) > 0
        ]

        
    opponent_heads = [opponent[0] for opponent in opponents]
        # Don't move adjacent to any heads
    head_adjacent_positions = {
            opponent_head_adjacent
            for opponent_head in opponent_heads
            for opponent_head_adjacent in adjacent_positions(opponent_head, columns, rows)
        }
    
    legal_actions = {
   
        for a in Action
        for new_position in [translate(position, a, 11, 7)]
        if (
            new_position not in head_adjacent_positions and
            new_position not in bodies)
        }

    if action in legal_actions:
        return action
    
    else:
        action = choice[actions]
        return action



Appending to submission.py
