In [1]:
# https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/envs/hungry_geese/hungry_geese.py

In [1]:
from kaggle_environments.envs.hungry_geese.hungry_geese import Observation, Configuration, Action, \
                                                                row_col, adjacent_positions, translate, min_distance

from kaggle_environments import make
from random import choice
import numpy as np
from copy import deepcopy
import pickle

Loading environment football failed: No module named 'gfootball'


In [2]:
import numpy as np
import random
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

from collections import deque

In [3]:
row_col(40, 11)

(3, 7)

In [4]:
class GreedyAgent:
    def __init__(self):
        
        self.last_action = None
        self.observations = []

    def __call__(self, observation: Observation, configuration: Configuration):
        self.configuration = configuration
        
        board = np.zeros(self.configuration.rows*self.configuration.columns)
        board_shape = (self.configuration.rows, self.configuration.columns)
        
        board_heads = deepcopy(board)
        board_bodies = deepcopy(board)
        board_rewards = deepcopy(board)
        
        
        rows, columns = self.configuration.rows, self.configuration.columns

        food = observation.food
        geese = observation.geese
        
        
        opponents = [
            goose
            for index, goose in enumerate(geese)
            if index != observation.index and len(goose) > 0
        ]

        
        opponent_heads = [opponent[0] for opponent in opponents]
        # Don't move adjacent to any heads
        head_adjacent_positions = {
            opponent_head_adjacent
            for opponent_head in opponent_heads
            for opponent_head_adjacent in adjacent_positions(opponent_head, columns, rows)
        }
        
        tail_adjacent_positions ={
            opponent_tail_adjacent
            for opponent in opponents
            for opponent_tail in [opponent[-1]]
            for opponent_tail_adjacent in adjacent_positions(opponent_tail, columns, rows)
        }
        # Don't move into any bodies
        #bodies, heads = [position for goose in geese for position in goose]
        
        heads = [i[0] for i in geese if len(i)>1]
        bodies = [item for sublist in geese for item in sublist]
        
        board_bodies[list(bodies)] = 1
        board_heads[heads] = 1

        # Move to the closest food
        position = geese[observation.index][0]
        actions = {
            action: min_distance(new_position, food, columns)
            for action in Action
            for new_position in [translate(position, action, columns, rows)]
            if (
                new_position not in head_adjacent_positions and
                new_position not in bodies and
                (self.last_action is None or action != self.last_action.opposite())
            )
        }

        action = min(actions, key=actions.get) if any(actions) else choice([action for action in Action])
        
        
        cur_obs = {}
        cur_obs['head_adjacent_positions'] = head_adjacent_positions
        cur_obs['bodies'] = bodies
        cur_obs['board_bodies'] = board_bodies.reshape(board_shape)
        cur_obs['board_heads'] = board_heads.reshape(board_shape)
        cur_obs['tails'] = tail_adjacent_positions
        cur_obs['actions'] = actions
        cur_obs['action'] = action
        cur_obs['last_action'] = self.last_action
#         cur_obs['goose_size'] = player_goose_len
#         cur_obs['board'] = board
        cur_obs['cur_action'] = action
        self.observations.append(cur_obs)
        
        self.last_action = action
        return action.name


cached_greedy_agents = {}


def greedy_agent(obs, config):
    index = obs["index"]
    if index not in cached_greedy_agents:
        cached_greedy_agents[index] = GreedyAgent(Configuration(config))
    return cached_greedy_agents[index](Observation(obs))

In [5]:
def geese_heads(obs_dict, config_dict):
    """
    Return the position of the geese's heads
    """
    configuration = Configuration(config_dict)

    observation = Observation(obs_dict)
    player_index = observation.index
    player_goose = observation.geese[player_index]
    player_head = player_goose[0]
    player_row, player_column = row_col(player_head, configuration.columns)
    positions = []
    for geese in observation.geese:
        if len(geese)>0:
            geese_head = geese[0]
            row, column = row_col(geese_head, configuration.columns)
        else:
            row = None
            column = None
        positions.append((row, column))
    return positions

def get_last_actions(previous_geese_heads, heads_positions):

    def get_last_action(prev, cur):
        last_action = None

        prev_row = prev[0]
        prev_col = prev[1]
        cur_row = cur[0]
        cur_col = cur[1]

        if cur_row is not None:
            if (cur_row-prev_row == 1) | ((cur_row==0) & (prev_row==6)):
                last_action = Action.SOUTH.name
            elif (cur_row-prev_row == -1) | ((cur_row==6) & (prev_row==0)):
                last_action = Action.NORTH.name
            elif (cur_col-prev_col == 1) | ((cur_col==0) & (prev_col==10)):
                last_action = Action.EAST.name
            elif (cur_col-prev_col == -1) | ((cur_col==10) & (prev_col==0)):
                last_action = Action.WEST.name

        return last_action

    if len(previous_geese_heads) == 0:
        actions = [Action.SOUTH.name, Action.NORTH.name, Action.EAST.name, Action.WEST.name]
        nb_geeses = len(heads_positions)
        last_actions = [actions[np.random.randint(4)] for _ in range(nb_geeses)]
    else:   
        last_actions = [get_last_action(*pos) for pos in zip(previous_geese_heads, heads_positions)]

    return last_actions
    
def central_state_space(obs_dict, config_dict, prev_head):
    """
    Recreating a board where my agent's head in the middle of the board 
    (position (4,5)), and creating features accordingly
    """
    
    configuration = Configuration(config_dict)

    observation = Observation(obs_dict)
    player_index = observation.index
    player_goose = observation.geese[player_index]
    if len(player_goose)==0:
        player_head = prev_head
    else:
        player_head = player_goose[0]
    player_row, player_column = row_col(player_head, configuration.columns)
    row_offset = player_row - 3
    column_offset = player_row - 5

    foods = observation['food']

    def centralize(row, col):
        if col > player_column:
            new_col = (5 + col - player_column) % 11
        else:
            new_col = 5 - (player_column - col)
            if new_col < 0:
                new_col += 11

        if row > player_row:
            new_row = (3 + row - player_row) % 7
        else:
            new_row = 3 - (player_row - row)
            if new_row < 0:
                new_row += 7
        return new_row, new_col

    food1_row, food1_column = centralize(*row_col(foods[0], configuration.columns))
    food2_row, food2_column = centralize(*row_col(foods[1], configuration.columns))

#     food1_row_feat = float(food1_row - 3)/3 if food1_row>=3 else float(food1_row - 3)/3
#     food2_row_feat = float(food2_row - 3)/3 if food2_row>=3 else float(food2_row - 3)/3

#     food1_col_feat = float(food1_column - 5)/5 if food1_column>=5 else float(food1_column - 5)/5
#     food2_col_feat = float(food2_column - 5)/5 if food2_column>=5 else float(food2_column - 5)/5

    # Create the grid
    board = np.zeros([7, 11])
    # Add food to board
    board[food1_row, food1_column] = 1
    board[food2_row, food2_column] = 1

    for goose in observation.geese:
        if len(goose)>0:
            for pos in goose:
                # set bodies to 1
                row, col = centralize(*row_col(pos, configuration.columns))
                board[row, col]=-0.5
                # Set head to five
            row, col = centralize(*row_col(goose[0], configuration.columns))
            board[row,col]=-1
            # Just make sure my goose head is 0
            board[3, 5] = 0
                    
            
    return board #,len(player_goose), food1_row_feat, food1_col_feat, food2_row_feat, food2_col_feat

In [6]:
env = make("hungry_geese", debug=True)
config = env.configuration

state_dict = env.reset(num_agents=4)[0]
observation = state_dict['observation']
action = state_dict['action']

prev_head = 0

In [7]:
central_state_space(observation, config, prev_head)

array([[ 0.,  0.,  0.,  0.,  0., -1.,  1.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.]])

In [8]:
env.render(mode="ipython")

In [9]:
def min_dir(p1, p2, max_p):
    """
    min distance and direction from p1
    """
    direction = 'left' # left by default
    d1 = abs(p1 - p2) # Distance going across board
    d2 = min(abs(p1-0), abs(p1 - max_p)) + min(abs(p2-0), abs(p2 - max_p)) # Distance wrapping around board
    
    if p1>p2 and d1<d2:
        direction = 'left'
        
    elif p1>p2 and d1>d2:
        direction ='right'
    
    elif p2>p1 and d1<d2:
        direction = 'right'
    
    elif p2>p1 and d1>d2:
        direction = 'left'
    
    dir_vec = np.zeros(2)
        
    if direction=='left':
        dir_vec[0]=1
    
    else:
        dir_vec[1]=1
    
    min_dist = np.array([min(d1, d2)])/max_p
    return dir_vec, min_dist
        

### Test min dir

In [120]:
cols = 11
p1 = 2
p2 = 5

min_dir(p1,p2,cols)

(array([0., 1.]), array([0.27272727]))

In [121]:
cols = 11
p1 = 2
p2 = 9

min_dir(p1,p2,cols)

(array([1., 0.]), array([0.36363636]))

In [122]:
cols = 11
p1 = 9
p2 = 2

min_dir(p1, p2, cols)

(array([0., 1.]), array([0.36363636]))

In [10]:
class StateTranslator_Compressed:
    def __init__(self):
        
        self.last_action = 'NORTH'
        self.step_count = 0
        self.last_goose_length = 1
        self.observations = []
        
    def set_last_action(self, last_action):
        self.last_action = last_action
        
    def update_step(self):
        self.step_count += 1
        
    def __get_last_action_vec(self):
        action_vec = np.zeros(4)
        
        if self.last_action == 'NORTH':
            action_vec[0] = 1
        elif self.last_action == 'SOUTH':
            action_vec[1] = 1
        elif self.last_action == 'EAST':
            action_vec[2] = 1
        elif self.last_action == 'WEST':
            action_vec[3] = 1
        
        return action_vec

    def get_state(self, observation, configuration):
        self.configuration = configuration
        
        board = np.zeros(self.configuration.rows*self.configuration.columns)
        board_shape = (self.configuration.rows, self.configuration.columns)

        rows, columns = self.configuration.rows, self.configuration.columns
        

        food = observation.food
        food1x, food1y = row_col(food[0], columns)
        food2x, food2y = row_col(food[1], columns)
        
        
        geese = observation.geese
        
        # If my goose died and I need the state, I return 0 instead of having this break... i probably should use if else..
        try:
            my_goose = np.array(geese[observation['index']][0])
        except:
            my_goose = 0
            
        goose_x, goose_y = row_col(my_goose, columns)
            
        self.current_goose_length = len(geese[observation['index']])  
        
        
        def where_da_food_at(goose, food):
            
            food1x, food1y = row_col(food[0], 11)
            food2x, food2y = row_col(food[1], 11)
            
            my_goose_x, my_goose_y = row_col(my_goose, 11)
            
            x_dir1, x_dist1 = min_dir(my_goose_x, food1x, 11)
            x_dir2, x_dist2 = min_dir(my_goose_x, food2x, 11)
            y_dir1, y_dist1 = min_dir(my_goose_y, food1y, 7)
            y_dir2, y_dist2 = min_dir(my_goose_y, food2y, 7)
            
            # Find the food that is the closest
            if (x_dist1+y_dist1) > (x_dist2 + y_dist2):
                x_dir_fin, x_dist_fin = x_dir2, x_dist2
                y_dir_fin, y_dist_fin = y_dir2, y_dist2
                
            else:
                x_dir_fin, x_dist_fin = x_dir1, x_dist1
                y_dir_fin, y_dist_fin = y_dir1, y_dist1
            
            dir_vec = np.concatenate((x_dir_fin, x_dist_fin, y_dir_fin, y_dist_fin))
            
            return dir_vec
        
        dir_vec = where_da_food_at(my_goose, food)
        
        opponents = [
            goose
            for index, goose in enumerate(geese)
            if index != observation.index and len(goose) > 0
        ]
        
        opponent_heads = [opponent[0] for opponent in opponents]
        # Don't move adjacent to any heads
        head_adjacent_positions = {
            opponent_head_adjacent
            for opponent_head in opponent_heads
            for opponent_head_adjacent in adjacent_positions(opponent_head, columns, rows)
        }
        
        

        heads = [i[0] for i in geese if len(i)>1]
        bodies = [item for sublist in geese for item in sublist]
        
        
        board[list(bodies)] = 0.5
        board[heads] = 1
        
        state = np.array([])
        state = np.append(state, self.__get_last_action_vec())
        state = np.append(state, self.step_count/200)
        state = np.append(state, my_goose/(7*11)) # Scale this down by board sie
        state = np.append(state, self.current_goose_length)
        state = np.append(state, dir_vec/10)
        state = np.append(state, board)
        state = state.flatten()
        
        test_state = np.array([])
        test_state = np.append(test_state, dir_vec)
        test_state = np.append(test_state, self.__get_last_action_vec())
        
        return state
    
    def translate_action_to_text(self, action):
        
        h = {0 : 'WEST',
             1: 'EAST',
             2: 'NORTH',
             3: 'SOUTH'}
        
        return h[action]

    def translate_text_to_int(self, action):
        h = {'WEST':0,
            'EAST':1,
            'NORTH':2,
            'SOUTH':3}
        
        return h[action]
    
    def calculate_reward(self):
        prev = self.last_goose_length
        cur = self.current_goose_length
        
        reward = 0
#         if prev>cur:
#             reward = -100
        
        if cur>prev:
            reward = 100
        
        else:
            reward = -1
        
        return reward
    
    def update_length(self):
        self.last_goose_length = self.current_goose_length

In [11]:
class StateTranslator_Central:
    """
    Returns a board where we are always at the center
    """
    
    def __init__(self):
        
        self.last_action = None
        self.step_count = 0
        self.last_goose_length = 1
        self.last_goose_ind = 0
        self.observations = []
        
    def set_last_action(self, last_action):
        self.last_action = last_action
        
    def update_step(self):
        self.step_count += 1
        
    def __get_last_action_vec(self):
        action_vec = np.zeros(4)
        
        if self.last_action == 'NORTH':
            action_vec[0] = 1
        elif self.last_action == 'SOUTH':
            action_vec[1] = 1
        elif self.last_action == 'EAST':
            action_vec[2] = 1
        elif self.last_action == 'WEST':
            action_vec[3] = 1
        
        return action_vec
    
    def translate_action_to_text(self, action):
        
        h = {0 : 'WEST',
             1: 'EAST',
             2: 'NORTH',
             3: 'SOUTH'}
        
        return h[action]

    def translate_text_to_int(self, action):
        h = {'WEST':0,
            'EAST':1,
            'NORTH':2,
            'SOUTH':3}
        
        return h[action]
    
    
    def update_length(self):
        self.last_goose_length = self.current_goose_length
        
    
    def get_state(self, observation, config):
        
        board = central_state_space(observation, config, self.last_goose_ind)
        
        geese = observation['geese']
        self.current_goose_length = len(geese[observation['index']])  
        
        #### This is exception handling for if our goose died this turn, to use the last
        ### known index as its postion for the state centralizer
        if len(geese[observation['index']])>1:
            self.last_goose_ind = geese[observation['index']][0]
            
        ####
        biggest_goose = 0
        alive_geese = 0
        for ind, goose in enumerate(geese):
            if len(goose)>biggest_goose:
                biggest_goose = len(goose)
            if ind != observation['index'] and len(goose)>0:
                alive_geese+=1
        
        state = np.array([])
        state = np.append(state, self.__get_last_action_vec())
        state = np.append(state, self.current_goose_length/15)
        state = np.append(state, biggest_goose/15)
        state = np.append(state, alive_geese/8)
        state = np.append(state, self.step_count/200)
        state = np.append(state, board.flatten())
        
        state = state.flatten()
        
        return state
    
    def calculate_reward(self, observation):
        
        current_geese = observation['geese']
        prev = self.last_goose_length
        cur = len(current_geese[observation['index']])  
        
        reward = -1       
    
        ### If we grow, reward is 100
        if cur>prev:
            reward = 100
        
        else:
            reward = -1
        
        ### If we die -150
        if cur == 0:
            reward = -150
            
        ### see if any geese are alive

        alive_geese = 0
        for ind, goose in enumerate(current_geese):
            if ind != observation['index'] and len(goose)>0:
                alive_geese+=1
        
        # If we are the last one standing
        if alive_geese == 0 and cur>0:
            reward = 500
            
        ### if the game ends and we are the biggest
        if self.step_count == 200:
            biggest_goose = 0
            biggest_goose_ind = None
            for ind, goose in enumerate(current_geese):
                if len(goose)>biggest_goose:
                    biggest_goose = len(goose)
                    biggest_goose_ind = ind
            
            if biggest_goose_ind == observation['index']:
                reward = 500
        
        return reward
        

In [12]:
st_test = StateTranslator_Central()

In [13]:
env = make("hungry_geese", debug=True)
config = env.configuration

state_dict = env.reset(num_agents=4)[0]
observation = state_dict['observation']
action = state_dict['action']

In [14]:
state_dict

{'action': 'NORTH',
 'reward': 0,
 'info': {},
 'observation': {'remainingOverageTime': 60,
  'step': 0,
  'geese': [[60], [33], [4], [65]],
  'food': [70, 25],
  'index': 0},
 'status': 'ACTIVE'}

In [15]:
st_test.set_last_action("SOUTH")
st_test.update_step()
state_test = st_test.get_state(observation, config)
state_test

array([ 0.        ,  1.        ,  0.        ,  0.        ,  0.06666667,
        0.06666667,  0.375     ,  0.005     ,  0.        ,  0.        ,
        0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        , -1.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -1.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -1.        ,  0.        ,  0.  

In [16]:
len(state_test)

85

In [17]:
st_test.set_last_action = 'NORTH'
st_test.translate_text_to_int('EAST')

1

In [18]:
st_test.calculate_reward(observation)

-1

In [19]:
class dqnAgent:
    """
    Given an environment state, choose an action, and learn from the reward
    https://towardsdatascience.com/reinforcement-learning-w-keras-openai-dqns-1eed3a5338c
    https://towardsdatascience.com/deep-q-learning-tutorial-mindqn-2a4c855abffc
    https://www.researchgate.net/post/What-are-possible-reasons-why-Q-loss-is-not-converging-in-Deep-Q-Learning-algorithm
    """

    def __init__(self, model=None, epsilon = 1.0, epsilon_min = 0.05):

        self.StateTrans = StateTranslator_Central()
        self.state_shape  = 85
        print('my state shape is:', self.state_shape)
        self.memory  = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = 0.990
        self.learning_rate = 0.001
        self.tau = .125

        if model == None:
            self.model = self.create_model()
        else:
            self.model = model
        self.target_model = self.create_model()

    def create_model(self):
        model   = Sequential()
        model.add(Dense(2000, input_dim=self.state_shape, activation="relu"))
        model.add(Dense(1000, activation="relu"))
        model.add(Dense(500, activation="relu"))
        model.add(Dense(1000, activation="relu"))
        model.add(Dense(500, activation="relu"))
        model.add(Dense(100, activation="relu"))
        model.add(Dense(4))
        model.compile(loss="MSE",
            optimizer=Adam(lr=self.learning_rate))
        return model

    def act(self, state):
        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)
        if np.random.random() < self.epsilon:
            return random.choice([0,1,2,3])

        action_values = self.model.predict(state.reshape(-1, self.state_shape))[0]
        action = np.argmax(action_values)

        return action
    
    def translate_state(self, observation, configuration):
        state = self.StateTrans.get_state(observation, configuration)
        return state

    def __call__(self, observation, configuration):
        
        state = self.translate_state(observation, configuration)
        action = self.act(state)
        # State translator will take in 0, 1, 2, 3 and return straight, left or right, which in turn will 
        # be translated into a kaggle Action
        action_text = self.StateTrans.translate_action_to_text(action)
        
        # Update our step number and actions
        self.StateTrans.update_step()
        self.StateTrans.set_last_action(action_text)
        
        return action_text
    
    def remember(self, state, action, reward, new_state, done):
        self.memory.append([state, action, reward, new_state, done])

    def replay(self):
        batch_size = 32
        if len(self.memory) < batch_size:
            return

        samples = random.sample(self.memory, batch_size)
        ########################
        # This can be sped up significantly, but processing all samples in batch rather than 1 at a time
        ####################
        for sample in samples:
            state, action, reward, new_state, done = sample
            target = self.target_model.predict(state.reshape(-1, self.state_shape))
            if done:
                target[0][action] = reward
            else:
                Q_future = max(self.target_model.predict(new_state.reshape(-1, self.state_shape))[0])
                target[0][action] = reward + Q_future * self.gamma
            self.model.fit(state.reshape(-1, self.state_shape), target, epochs=1, verbose=0)

    def target_train(self):
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
        self.target_model.set_weights(target_weights)

    def save_model(self, fn):
        self.model.save(fn)


### Training script

In [None]:
steps_per_ep = 200
num_episodes = 10000

env = make("hungry_geese", debug=True)
config = env.configuration
train_name = 'central_agent'
model = keras.models.load_model('central_agent/trial-453')

dqn = dqnAgent(model = model, epsilon = 0.05)
agent2 = GreedyAgent()
agent3 = GreedyAgent()
agent4 = GreedyAgent()

agents = [dqn, agent2, agent3, agent4]

results_dic = {}
for ep in range(num_episodes):
    
    print('episode number: ', ep+453)
    state_dict = env.reset(num_agents=4)[0]
    observation = state_dict['observation']
    my_goose_ind = observation['index']
    
    action = state_dict['action']
    
    dqn.StateTrans.set_last_action(action)
    dqn.StateTrans.step_count = 0
    dqn.StateTrans.last_goose_length = 1
    cur_state = dqn.StateTrans.get_state(observation, config)
    
    
    done = False
    for step in range(steps_per_ep):
        actions = []
        for agent in agents:
            action = agent(observation, config)
            actions.append(action)
        
        state_dict = env.step(actions)[0]
        observation = state_dict['observation']
        print(observation)
        
        action = state_dict['action']
        status = state_dict['status']
        
        
        action_for_model = dqn.StateTrans.translate_text_to_int(action)
        new_state = dqn.StateTrans.get_state(observation, config)
        
        # Set rewards based on if value was gained or lost
        reward = dqn.StateTrans.calculate_reward(observation)
        # Update our goose length based on prev state
        dqn.StateTrans.update_length()
       

        if status != "ACTIVE":
            done = True
        
        
        #Temp for just training agent to go get food
        
#         if reward<-1:
#             reward = -1
        print('reward: ', reward)
        dqn.remember(cur_state, action_for_model, reward, new_state, done)
        
        cur_state = new_state
            
        # Check if my goose died

            
        if done:
            print('Done, Step: ', step)
            print('status, ', status)
            results_dic[ep] = reward
            
            if ep % 10 == 0:
                directory = train_name
                dqn.save_model(directory + f"/trial-{ep}")
                with open(directory + "/results_dic.pkl", 'wb') as f:
                    pickle.dump(results_dic, f)
            break
        

        if step%5 == 0:
            dqn.replay()        
            dqn.target_train()

#         if step%50 == 0:
#             print(f'We survived {step} steps')
#             directory = train_name
#             dqn.save_model(directory + f"/trial-{ep}")
        


my state shape is: 85
episode number:  453
{'remainingOverageTime': 60, 'step': 1, 'geese': [[54], [74], [73], [23]], 'food': [72, 5], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[44], [8], [7], [34]], 'food': [72, 5], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[55], [7], [6], [33]], 'food': [72, 5], 'index': 0}
reward:  -1
Goose Collision: WEST
{'remainingOverageTime': 60, 'step': 4, 'geese': [[66], [], [5, 6], [43]], 'food': [72, 45], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[67], [], [71, 5], [32]], 'food': [72, 45], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[68], [], [60, 71], [21]], 'food': [72, 45], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 7, 'geese': [[69], [], [49, 60], [10]], 'food': [72, 45], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[70], [], [50, 49], [0]], 'food': [72, 45], 'index': 0}
reward:  -1


{'remainingOverageTime': 60, 'step': 17, 'geese': [[6, 5, 4, 70], [], [47], [45]], 'food': [10, 52], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 18, 'geese': [[7, 6, 5, 4], [], [48], [46]], 'food': [10, 52], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 19, 'geese': [[8, 7, 6, 5], [], [49], [47]], 'food': [10, 52], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 20, 'geese': [[9, 8, 7, 6], [], [50], [48]], 'food': [10, 52], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 21, 'geese': [[10, 9, 8, 7, 6], [], [51], [49]], 'food': [52, 34], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 22, 'geese': [[21, 10, 9, 8, 7], [], [40], [38]], 'food': [52, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[11, 21, 10, 9, 8], [], [41], [39]], 'food': [52, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[12, 11, 21, 10, 9], [], [42], [40]], 'food': [52, 34], 'index':

{'remainingOverageTime': 60, 'step': 32, 'geese': [[53, 52], [51, 40], [44], [48, 37, 26]], 'food': [61, 71], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 33, 'geese': [[54, 53], [62, 51], [55], [59, 48, 37]], 'food': [61, 71], 'index': 0}
reward:  -1
Opposite action: (3, <Action.NORTH: 1>, <Action.SOUTH: 3>)
{'remainingOverageTime': 60, 'step': 34, 'geese': [[65, 54], [63, 62], [66], []], 'food': [61, 71], 'index': 0}
reward:  -1
Opposite action: (2, <Action.NORTH: 1>, <Action.SOUTH: 3>)
{'remainingOverageTime': 60, 'step': 35, 'geese': [[55, 65], [74, 63], [], []], 'food': [61, 71], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 36, 'geese': [[56, 55], [75, 74], [], []], 'food': [61, 71], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 37, 'geese': [[57, 56], [76, 75], [], []], 'food': [61, 71], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 38, 'geese': [[58, 57], [66, 76], [], []], 'food': [61, 71], 'index': 0}
reward:  -1
{'

{'remainingOverageTime': 60, 'step': 7, 'geese': [[66], [48], [50], [29]], 'food': [0, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[0, 66], [59], [61], [40]], 'food': [34, 7], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 9, 'geese': [[11, 0], [60], [62], [41]], 'food': [34, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[22, 11], [61], [63], [42]], 'food': [34, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 11, 'geese': [[23, 22], [62], [64], [43]], 'food': [34, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 12, 'geese': [[34, 23, 22], [73], [75], [54]], 'food': [7, 36], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 13, 'geese': [[35, 34, 23], [74], [76], [44]], 'food': [7, 36], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 14, 'geese': [[36, 35, 34, 23], [75], [66], [45]], 'food': [7, 58], 'index': 0}
reward:  100
{'remainingOverageTime

{'remainingOverageTime': 60, 'step': 7, 'geese': [[29, 28], [37], [41], [7]], 'food': [22, 71], 'index': 0}
reward:  -1
Opposite action: (3, <Action.NORTH: 1>, <Action.SOUTH: 3>)
{'remainingOverageTime': 60, 'step': 8, 'geese': [[40, 29], [48], [52], []], 'food': [22, 71], 'index': 0}
reward:  -1
Goose Collision: SOUTH
Goose Collision: WEST
{'remainingOverageTime': 60, 'step': 9, 'geese': [[], [47], [], []], 'food': [22, 71], 'index': 0}
reward:  -150
Done, Step:  8
status,  DONE
episode number:  463
{'remainingOverageTime': 60, 'step': 1, 'geese': [[9], [27], [51], [71]], 'food': [0, 6], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[10], [26], [50], [70]], 'food': [0, 6], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[0, 10], [25], [49], [69]], 'food': [6, 26], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 4, 'geese': [[1, 0], [36], [60], [3]], 'food': [6, 26], 'index': 0}
reward:  -1
{'remainingOverageTime': 60,

{'remainingOverageTime': 60, 'step': 2, 'geese': [[64], [56], [27], [69]], 'food': [62, 55], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[65], [55, 56], [26], [68]], 'food': [62, 13], 'index': 0}
reward:  -1
Goose Collision: EAST
{'remainingOverageTime': 60, 'step': 4, 'geese': [[], [65, 55], [25], [67]], 'food': [62, 13], 'index': 0}
reward:  -150
Done, Step:  3
status,  DONE
episode number:  467
{'remainingOverageTime': 60, 'step': 1, 'geese': [[76], [46], [54], [33]], 'food': [2, 15], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[66], [57], [65], [44]], 'food': [2, 15], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[0], [68], [76], [55]], 'food': [2, 15], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[1], [69], [66], [56]], 'food': [2, 15], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[2, 1], [70], [67], [57]], 'food': [15, 31], 'index': 0}


{'remainingOverageTime': 60, 'step': 17, 'geese': [[18, 7, 6], [], [8], []], 'food': [75, 25], 'index': 0}
reward:  -1
Goose Collision: WEST
{'remainingOverageTime': 60, 'step': 18, 'geese': [[29, 18, 7], [], [], []], 'food': [75, 25], 'index': 0}
reward:  500
Done, Step:  17
status,  DONE
episode number:  471
{'remainingOverageTime': 60, 'step': 1, 'geese': [[23], [20], [37], [42]], 'food': [75, 70], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[24], [21], [38], [43]], 'food': [75, 70], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[35], [11], [39], [33]], 'food': [75, 70], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[46], [12], [40], [34]], 'food': [75, 70], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[47], [13], [41], [35]], 'food': [75, 70], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[48], [14], [42], [36]], 'food': [75, 70], 'index': 0

{'remainingOverageTime': 60, 'step': 2, 'geese': [[63], [31, 30], [41], [72, 71]], 'food': [42, 15], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[64], [32, 31], [42, 41], [73, 72]], 'food': [15, 10], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[65], [22, 32], [43, 42], [74, 73]], 'food': [15, 10], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[55], [33, 22], [54, 43], [8, 74]], 'food': [15, 10], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[56], [34, 33], [44, 54], [9, 8]], 'food': [15, 10], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 7, 'geese': [[57], [35, 34], [45, 44], [10, 9, 8]], 'food': [15, 68], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[68, 57], [46, 35], [56, 45], [21, 10, 9]], 'food': [15, 54], 'index': 0}
reward:  100
Goose Collision: WEST
Goose Collision: SOUTH
{'remainingOverageTime': 60, 'step': 9, 'geese': 

{'remainingOverageTime': 60, 'step': 32, 'geese': [[34, 33, 22, 11, 21], [], [9, 8], []], 'food': [45, 75], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 33, 'geese': [[45, 34, 33, 22, 11, 21], [], [20, 9], []], 'food': [75, 7], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 34, 'geese': [[46, 45, 34, 33, 22, 11], [], [21, 20], []], 'food': [75, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 35, 'geese': [[47, 46, 45, 34, 33, 22], [], [10, 21], []], 'food': [75, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 36, 'geese': [[58, 47, 46, 45, 34, 33], [], [76, 10], []], 'food': [75, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 37, 'geese': [[69, 58, 47, 46, 45, 34], [], [66, 76], []], 'food': [75, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 38, 'geese': [[70, 69, 58, 47, 46, 45], [], [0, 66], []], 'food': [75, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 39, 'geese

{'remainingOverageTime': 60, 'step': 2, 'geese': [[28], [27], [64], [14]], 'food': [7, 2], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[39], [16], [53], [3]], 'food': [7, 2], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[50], [5], [42], [69]], 'food': [7, 2], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[51], [71], [31], [58]], 'food': [7, 2], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[52], [60], [20], [47]], 'food': [7, 2], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 7, 'geese': [[63], [49], [9], [36]], 'food': [7, 2], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[74], [38], [75], [25]], 'food': [7, 2], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 9, 'geese': [[75], [27], [64], [14]], 'food': [7, 2], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[76], [26], [63], [13]], 'food':

{'remainingOverageTime': 60, 'step': 2, 'geese': [[21], [27], [55], [49, 48]], 'food': [76, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[11], [28], [56], [50, 49]], 'food': [76, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[22, 11], [39], [67], [61, 50]], 'food': [76, 56], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 5, 'geese': [[23, 22], [40], [68], [62, 61]], 'food': [76, 56], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[24, 23], [51], [2], [73, 62]], 'food': [76, 56], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 7, 'geese': [[25, 24], [62], [13], [7, 73]], 'food': [76, 56], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[36, 25], [73], [24], [18, 7]], 'food': [76, 56], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 9, 'geese': [[47, 36], [7], [35], [29, 18]], 'food': [76, 56], 'index': 0}
reward:  -1
{'remaining

{'remainingOverageTime': 60, 'step': 32, 'geese': [[0, 10, 76, 75, 74], [68], [28, 29], [50]], 'food': [19, 65], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 33, 'geese': [[1, 0, 10, 76, 75], [2], [39, 28], [61]], 'food': [19, 65], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 34, 'geese': [[12, 1, 0, 10, 76], [13], [50, 39], [72]], 'food': [19, 65], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 35, 'geese': [[13, 12, 1, 0, 10], [24], [61, 50], [6]], 'food': [19, 65], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 36, 'geese': [[14, 13, 12, 1, 0], [25], [62, 61], [7]], 'food': [19, 65], 'index': 0}
reward:  -1
Opposite action: (0, <Action.WEST: 4>, <Action.EAST: 2>)
{'remainingOverageTime': 60, 'step': 37, 'geese': [[], [26], [63, 62], [8]], 'food': [19, 65], 'index': 0}
reward:  -150
Done, Step:  36
status,  DONE
episode number:  485
{'remainingOverageTime': 60, 'step': 1, 'geese': [[25], [47], [31], [40]], 'food': [56, 1], '

{'remainingOverageTime': 60, 'step': 2, 'geese': [[49], [53], [65], [21]], 'food': [19, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[50], [42], [54], [10]], 'food': [19, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[51], [31], [43], [76]], 'food': [19, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[52], [20], [32], [65]], 'food': [19, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[53], [9], [21], [54]], 'food': [19, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 7, 'geese': [[42], [75], [10], [43]], 'food': [19, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[43], [64], [76], [32]], 'food': [19, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 9, 'geese': [[33], [65], [66], [22]], 'food': [19, 34], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[34, 33], [55]

{'remainingOverageTime': 60, 'step': 22, 'geese': [[35, 34], [41, 40, 51], [57, 56], []], 'food': [37, 8], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[36, 35], [42, 41, 40], [58, 57], []], 'food': [37, 8], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[37, 36, 35], [43, 42, 41], [59, 58], []], 'food': [8, 52], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 25, 'geese': [[38, 37, 36], [33, 43, 42], [60, 59], []], 'food': [8, 52], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 26, 'geese': [[39, 38, 37], [34, 33, 43], [61, 60], []], 'food': [8, 52], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 27, 'geese': [[40, 39, 38], [35, 34, 33], [62, 61], []], 'food': [8, 52], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 28, 'geese': [[41, 40, 39], [36, 35, 34], [63, 62], []], 'food': [8, 52], 'index': 0}
reward:  -1
Opposite action: (0, <Action.WEST: 4>, <Action.EAST: 2>)
{'rem

{'remainingOverageTime': 60, 'step': 12, 'geese': [[50, 51], [], [3], [45, 44]], 'food': [54, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 13, 'geese': [[61, 50], [], [69], [34, 45]], 'food': [54, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 14, 'geese': [[62, 61], [], [70], [35, 34]], 'food': [54, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 15, 'geese': [[73, 62], [], [59], [24, 35]], 'food': [54, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 16, 'geese': [[74, 73], [], [60], [25, 24]], 'food': [54, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 17, 'geese': [[75, 74], [], [49], [14, 25]], 'food': [54, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 18, 'geese': [[76, 75], [], [38], [3, 14]], 'food': [54, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 19, 'geese': [[10, 76], [], [27], [69, 3]], 'food': [54, 16], 'index': 0}
reward:  -1
{'remaining

{'remainingOverageTime': 60, 'step': 42, 'geese': [[64, 53, 42, 41], [27], [], []], 'food': [72, 75], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 43, 'geese': [[75, 64, 53, 42, 41], [38], [], []], 'food': [72, 23], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 44, 'geese': [[76, 75, 64, 53, 42], [37], [], []], 'food': [72, 23], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 45, 'geese': [[66, 76, 75, 64, 53], [26], [], []], 'food': [72, 23], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 46, 'geese': [[67, 66, 76, 75, 64], [15], [], []], 'food': [72, 23], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 47, 'geese': [[68, 67, 66, 76, 75], [4], [], []], 'food': [72, 23], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 48, 'geese': [[69, 68, 67, 66, 76], [5], [], []], 'food': [72, 23], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 49, 'geese': [[70, 69, 68, 67, 66], [6], [], []], 'food':

{'remainingOverageTime': 60, 'step': 22, 'geese': [[15, 14, 13, 2], [], [], [40]], 'food': [75, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[16, 15, 14, 13], [], [], [51]], 'food': [75, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[17, 16, 15, 14], [], [], [62]], 'food': [75, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 25, 'geese': [[18, 17, 16, 15], [], [], [73]], 'food': [75, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 26, 'geese': [[19, 18, 17, 16], [], [], [74]], 'food': [75, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 27, 'geese': [[20, 19, 18, 17], [], [], [75, 74]], 'food': [22, 32], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 28, 'geese': [[21, 20, 19, 18], [], [], [76, 75]], 'food': [22, 32], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 29, 'geese': [[32, 21, 20, 19, 18], [], [], [10, 76]], 'food': [22, 39], '

{'remainingOverageTime': 60, 'step': 17, 'geese': [[26, 25, 24], [75], [73], []], 'food': [50, 17], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 18, 'geese': [[27, 26, 25], [64], [62], []], 'food': [50, 17], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 19, 'geese': [[28, 27, 26], [53], [51], []], 'food': [50, 17], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 20, 'geese': [[39, 28, 27], [42], [40], []], 'food': [50, 17], 'index': 0}
reward:  -1
Goose Collision: WEST
{'remainingOverageTime': 60, 'step': 21, 'geese': [[50, 39, 28, 27], [41], [], []], 'food': [17, 43], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 22, 'geese': [[51, 50, 39, 28], [52], [], []], 'food': [17, 43], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[52, 51, 50, 39], [63], [], []], 'food': [17, 43], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[53, 52, 51, 50], [64], [], []], 'food': [17, 43], '

{'remainingOverageTime': 60, 'step': 27, 'geese': [[30, 29, 28, 27], [], [57], [59, 58]], 'food': [9, 42], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 28, 'geese': [[31, 30, 29, 28], [], [58], [60, 59]], 'food': [9, 42], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 29, 'geese': [[42, 31, 30, 29, 28], [], [69], [71, 60]], 'food': [9, 39], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 30, 'geese': [[43, 42, 31, 30, 29], [], [68], [70, 71]], 'food': [9, 39], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 31, 'geese': [[33, 43, 42, 31, 30], [], [57], [59, 70]], 'food': [9, 39], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 32, 'geese': [[34, 33, 43, 42, 31], [], [58], [60, 59]], 'food': [9, 39], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 33, 'geese': [[35, 34, 33, 43, 42], [], [59], [61, 60]], 'food': [9, 39], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 34, 'geese': [[36, 35, 34

{'remainingOverageTime': 60, 'step': 47, 'geese': [[61], [], [46], [65]], 'food': [29, 54], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 48, 'geese': [[72], [], [35], [54, 65]], 'food': [29, 31], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 49, 'geese': [[6], [], [24], [43, 54]], 'food': [29, 31], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 50, 'geese': [[7], [], [25], [33, 43]], 'food': [29, 31], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 51, 'geese': [[18], [], [36], [44, 33]], 'food': [29, 31], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 52, 'geese': [[29, 18], [], [47], [55, 44]], 'food': [31, 45], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 53, 'geese': [[30, 29], [], [48], [56, 55]], 'food': [31, 45], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 54, 'geese': [[31, 30, 29], [], [49], [57, 56]], 'food': [45, 33], 'index': 0}
reward:  100
{'remainingOverageTime': 60,

{'remainingOverageTime': 60, 'step': 32, 'geese': [[18, 17, 16, 15], [], [55, 44, 33], []], 'food': [50, 13], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 33, 'geese': [[29, 18, 17, 16], [], [66, 55, 44], []], 'food': [50, 13], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 34, 'geese': [[30, 29, 18, 17], [], [0, 66, 55], []], 'food': [50, 13], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 35, 'geese': [[31, 30, 29, 18], [], [11, 0, 66], []], 'food': [50, 13], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 36, 'geese': [[32, 31, 30, 29], [], [22, 11, 0], []], 'food': [50, 13], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 37, 'geese': [[43, 32, 31, 30], [], [33, 22, 11], []], 'food': [50, 13], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 38, 'geese': [[54, 43, 32, 31], [], [44, 33, 22], []], 'food': [50, 13], 'index': 0}
reward:  -1
Goose Collision: WEST
{'remainingOverageTime': 60, 'step': 39, 'g

{'remainingOverageTime': 60, 'step': 42, 'geese': [[29, 28, 27, 26, 37], [], [], [57]], 'food': [66, 0], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 43, 'geese': [[30, 29, 28, 27, 26], [], [], [46]], 'food': [66, 0], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 44, 'geese': [[41, 30, 29, 28, 27], [], [], [35]], 'food': [66, 0], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 45, 'geese': [[52, 41, 30, 29, 28], [], [], [34]], 'food': [66, 0], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 46, 'geese': [[63, 52, 41, 30, 29], [], [], [45]], 'food': [66, 0], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 47, 'geese': [[64, 63, 52, 41, 30], [], [], [56]], 'food': [66, 0], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 48, 'geese': [[75, 64, 63, 52, 41], [], [], [67]], 'food': [66, 0], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 49, 'geese': [[76, 75, 64, 63, 52], [], [], [66, 67]], 'foo

{'remainingOverageTime': 60, 'step': 12, 'geese': [[16, 15], [], [], [9, 8]], 'food': [49, 18], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 13, 'geese': [[17, 16], [], [], [10, 9]], 'food': [49, 18], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 14, 'geese': [[18, 17, 16], [], [], [0, 10]], 'food': [49, 74], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 15, 'geese': [[19, 18, 17], [], [], [11, 0]], 'food': [49, 74], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 16, 'geese': [[30, 19, 18], [], [], [22, 11]], 'food': [49, 74], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 17, 'geese': [[31, 30, 19], [], [], [33, 22]], 'food': [49, 74], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 18, 'geese': [[32, 31, 30], [], [], [44, 33]], 'food': [49, 74], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 19, 'geese': [[43, 32, 31], [], [], [55, 44]], 'food': [49, 74], 'index': 0}
reward:  -1
{'re

{'remainingOverageTime': 60, 'step': 2, 'geese': [[2], [36], [26], [27]], 'food': [74, 71], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[3], [25], [15], [16]], 'food': [74, 71], 'index': 0}
reward:  -1
Goose Collision: SOUTH
Goose Collision: NORTH
{'remainingOverageTime': 60, 'step': 4, 'geese': [[], [], [4], [5]], 'food': [74, 71], 'index': 0}
reward:  -150
Done, Step:  3
status,  DONE
episode number:  518
{'remainingOverageTime': 60, 'step': 1, 'geese': [[53], [61], [0], [50]], 'food': [23, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[64], [50], [66], [39]], 'food': [23, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[75], [39], [55], [28]], 'food': [23, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[76], [28], [44], [17]], 'food': [23, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[10], [29], [45], [18]], 'food': [23, 22],

{'remainingOverageTime': 60, 'step': 22, 'geese': [[68, 67], [27, 38], [65], [9]], 'food': [13, 39], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[2, 68], [16, 27], [54], [75]], 'food': [13, 39], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[13, 2, 68], [17, 16], [44], [76]], 'food': [39, 47], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 25, 'geese': [[24, 13, 2], [18, 17], [45], [66]], 'food': [39, 47], 'index': 0}
reward:  -1
Opposite action: (0, <Action.NORTH: 1>, <Action.SOUTH: 3>)
{'remainingOverageTime': 60, 'step': 26, 'geese': [[], [19, 18], [46], [67]], 'food': [39, 47], 'index': 0}
reward:  -150
Done, Step:  25
status,  DONE
episode number:  520
{'remainingOverageTime': 60, 'step': 1, 'geese': [[71], [68], [46], [15]], 'food': [34, 35], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[5], [57], [35, 46], [4]], 'food': [34, 1], 'index': 0}
reward:  -1
{'remainingOverageTime':

{'remainingOverageTime': 60, 'step': 22, 'geese': [[40, 39, 28], [], [18], []], 'food': [23, 76], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[41, 40, 39], [], [29], []], 'food': [23, 76], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[42, 41, 40], [], [30], []], 'food': [23, 76], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 25, 'geese': [[43, 42, 41], [], [31], []], 'food': [23, 76], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 26, 'geese': [[33, 43, 42], [], [32], []], 'food': [23, 76], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 27, 'geese': [[44, 33, 43], [], [22], []], 'food': [23, 76], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 28, 'geese': [[45, 44, 33], [], [23, 22], []], 'food': [76, 24], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 29, 'geese': [[46, 45, 44], [], [24, 23, 22], []], 'food': [76, 0], 'index': 0}
reward:  -1
{'remainingOv

{'remainingOverageTime': 60, 'step': 2, 'geese': [[24], [22], [45], [32]], 'food': [41, 50], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[25], [23], [46], [22]], 'food': [41, 50], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[26], [24], [47], [23]], 'food': [41, 50], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[27], [25], [48], [24]], 'food': [41, 50], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[28], [26], [49], [25]], 'food': [41, 50], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 7, 'geese': [[39], [37], [60], [36]], 'food': [41, 50], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[50, 39], [48], [71], [47]], 'food': [41, 24], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 9, 'geese': [[61, 50], [49], [72], [48]], 'food': [41, 24], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[62,

{'remainingOverageTime': 60, 'step': 7, 'geese': [[17, 6], [76, 65], [23], [51]], 'food': [14, 58], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 8, 'geese': [[18, 17], [75, 76], [22], [50]], 'food': [14, 58], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 9, 'geese': [[19, 18], [64, 75], [11], [39]], 'food': [14, 58], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[20, 19], [53, 64], [0], [28]], 'food': [14, 58], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 11, 'geese': [[21, 20], [42, 53], [66], [17]], 'food': [14, 58], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 12, 'geese': [[11, 21], [43, 42], [67], [18]], 'food': [14, 58], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 13, 'geese': [[12, 11], [33, 43], [68], [19]], 'food': [14, 58], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 14, 'geese': [[13, 12], [34, 33], [69], [20]], 'food': [14, 58], 'index': 0}
reward: 

{'remainingOverageTime': 60, 'step': 22, 'geese': [[44, 54, 43, 42, 41, 30], [14, 3], [39], [35, 24, 13]], 'food': [60, 27], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[45, 44, 54, 43, 42, 41], [15, 14], [40], [36, 35, 24]], 'food': [60, 27], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[46, 45, 44, 54, 43, 42], [16, 15], [41], [37, 36, 35]], 'food': [60, 27], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 25, 'geese': [[47, 46, 45, 44, 54, 43], [17, 16], [42], [38, 37, 36]], 'food': [60, 27], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 26, 'geese': [[48, 47, 46, 45, 44, 54], [18, 17], [43], [39, 38, 37]], 'food': [60, 27], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 27, 'geese': [[49, 48, 47, 46, 45, 44], [19, 18], [33], [40, 39, 38]], 'food': [60, 27], 'index': 0}
reward:  -1
Goose Collision: SOUTH
{'remainingOverageTime': 60, 'step': 28, 'geese': [[60, 49, 48, 47, 46, 45, 44

{'remainingOverageTime': 60, 'step': 7, 'geese': [[34, 33], [22], [64], [0]], 'food': [27, 72], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[35, 34], [23], [65], [1]], 'food': [27, 72], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 9, 'geese': [[36, 35], [24], [55], [2]], 'food': [27, 72], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[37, 36], [25], [56], [3]], 'food': [27, 72], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 11, 'geese': [[38, 37], [26], [57], [4]], 'food': [27, 72], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 12, 'geese': [[39, 38], [27, 26], [58], [5]], 'food': [72, 31], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 13, 'geese': [[50, 39], [38, 27], [69], [16]], 'food': [72, 31], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 14, 'geese': [[61, 50], [49, 38], [3], [27]], 'food': [72, 31], 'index': 0}
reward:  -1
{'remainingOverageTime

{'remainingOverageTime': 60, 'step': 22, 'geese': [[62, 51], [17, 6], [25], [40]], 'food': [43, 73], 'index': 0}
reward:  -1
Goose Collision: SOUTH
{'remainingOverageTime': 60, 'step': 23, 'geese': [[73, 62, 51], [28, 17], [36], []], 'food': [43, 69], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 24, 'geese': [[74, 73, 62], [27, 28], [35], []], 'food': [43, 69], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 25, 'geese': [[75, 74, 73], [16, 27], [24], []], 'food': [43, 69], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 26, 'geese': [[76, 75, 74], [5, 16], [13], []], 'food': [43, 69], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 27, 'geese': [[66, 76, 75], [71, 5], [2], []], 'food': [43, 69], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 28, 'geese': [[67, 66, 76], [72, 71], [3], []], 'food': [43, 69], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 29, 'geese': [[68, 67, 66], [73, 72], [4], []], 'f

{'remainingOverageTime': 60, 'step': 7, 'geese': [[40], [4], [1], [76]], 'food': [59, 63], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[41], [5], [2], [66]], 'food': [59, 63], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 9, 'geese': [[52], [16], [13], [0]], 'food': [59, 63], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[63, 52], [27], [24], [11]], 'food': [59, 12], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 11, 'geese': [[64, 63], [26], [23], [21]], 'food': [59, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 12, 'geese': [[65, 64], [15], [12, 23], [10]], 'food': [59, 60], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 13, 'geese': [[55, 65], [16], [13, 12], [0]], 'food': [59, 60], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 14, 'geese': [[56, 55], [17], [14, 13], [1]], 'food': [59, 60], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'ste

{'remainingOverageTime': 60, 'step': 27, 'geese': [[42, 31, 20], [], [8], [33, 22]], 'food': [60, 67], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 28, 'geese': [[53, 42, 31], [], [19], [44, 33]], 'food': [60, 67], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 29, 'geese': [[64, 53, 42], [], [30], [55, 44]], 'food': [60, 67], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 30, 'geese': [[75, 64, 53], [], [29], [65, 55]], 'food': [60, 67], 'index': 0}
reward:  -1
Goose Collision: WEST
{'remainingOverageTime': 60, 'step': 31, 'geese': [[76, 75, 64], [], [28], []], 'food': [60, 67], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 32, 'geese': [[66, 76, 75], [], [17], []], 'food': [60, 67], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 33, 'geese': [[67, 66, 76, 75], [], [18], []], 'food': [60, 68], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 34, 'geese': [[56, 67, 66, 76], [], [19], []], 'food': [60,

{'remainingOverageTime': 60, 'step': 22, 'geese': [[51, 50], [74, 63], [17], [24]], 'food': [71, 65], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[52, 51], [75, 74], [18], [25]], 'food': [71, 65], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[53, 52], [76, 75], [19], [26]], 'food': [71, 65], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 25, 'geese': [[54, 53], [66, 76], [20], [27]], 'food': [71, 65], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 26, 'geese': [[65, 54, 53], [0, 66], [31], [38]], 'food': [71, 16], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 27, 'geese': [[76, 65, 54], [11, 0], [42], [49]], 'food': [71, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 28, 'geese': [[10, 76, 65], [21, 11], [41], [48]], 'food': [71, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 29, 'geese': [[0, 10, 76], [20, 21], [40], [47]], 'food': [71, 16], 'i

{'remainingOverageTime': 60, 'step': 22, 'geese': [[2, 1, 0], [60, 59], [58], [61]], 'food': [4, 32], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[3, 2, 1], [61, 60], [59], [62]], 'food': [4, 32], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[4, 3, 2, 1], [62, 61], [60], [63]], 'food': [32, 24], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 25, 'geese': [[5, 4, 3, 2], [73, 62], [71], [74]], 'food': [32, 24], 'index': 0}
reward:  -1
Goose Collision: SOUTH
{'remainingOverageTime': 60, 'step': 26, 'geese': [[6, 5, 4, 3], [7, 73], [], [8]], 'food': [32, 24], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 27, 'geese': [[17, 6, 5, 4], [18, 7], [], [19]], 'food': [32, 24], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 28, 'geese': [[28, 17, 6, 5], [29, 18], [], [30]], 'food': [32, 24], 'index': 0}
reward:  -1
Goose Collision: WEST
Goose Collision: WEST
{'remainingOverageTime': 60, 'step':

Goose Collision: EAST
{'remainingOverageTime': 60, 'step': 17, 'geese': [[], [43, 32, 21], [10, 76], [20]], 'food': [52, 59], 'index': 0}
reward:  -150
Done, Step:  16
status,  DONE
episode number:  550
{'remainingOverageTime': 60, 'step': 1, 'geese': [[54], [26], [46], [3]], 'food': [23, 53], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[65], [25], [45], [2]], 'food': [23, 53], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[55], [14], [34], [68]], 'food': [23, 53], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[56], [13], [33], [67]], 'food': [23, 53], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[57], [2], [22], [56]], 'food': [23, 53], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[58], [68], [11], [45]], 'food': [23, 53], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 7, 'geese': [[59], [57], [0], [34]], 'food': [23, 53], 'index

{'remainingOverageTime': 60, 'step': 7, 'geese': [[33], [55], [31], [19]], 'food': [10, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[34], [44], [20], [8]], 'food': [10, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 9, 'geese': [[35], [33], [9], [74]], 'food': [10, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[36], [22], [75], [63]], 'food': [10, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 11, 'geese': [[37], [11], [64], [52]], 'food': [10, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 12, 'geese': [[38], [0], [53], [41]], 'food': [10, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 13, 'geese': [[39], [66], [42], [30]], 'food': [10, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 14, 'geese': [[40], [55], [31], [19]], 'food': [10, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 15, 'geese': [[41], [44],

{'remainingOverageTime': 60, 'step': 2, 'geese': [[58], [45], [44], [38]], 'food': [30, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[59], [34], [33], [27]], 'food': [30, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[60], [23], [22], [16]], 'food': [30, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[61], [12], [11], [5]], 'food': [30, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[62], [1], [0], [71]], 'food': [30, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 7, 'geese': [[63], [67], [66], [60]], 'food': [30, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[64], [56], [55], [49]], 'food': [30, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 9, 'geese': [[65], [45], [44], [38]], 'food': [30, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[76], [44], [54]

{'remainingOverageTime': 60, 'step': 32, 'geese': [[69, 68], [21, 20], [25, 24], [3]], 'food': [49, 7], 'index': 0}
reward:  100
Goose Collision: NORTH
{'remainingOverageTime': 60, 'step': 33, 'geese': [[70, 69], [10, 21], [14, 25], []], 'food': [49, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 34, 'geese': [[71, 70], [76, 10], [3, 14], []], 'food': [49, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 35, 'geese': [[72, 71], [65, 76], [69, 3], []], 'food': [49, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 36, 'geese': [[6, 72], [54, 65], [58, 69], []], 'food': [49, 7], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 37, 'geese': [[7, 6, 72], [44, 54], [59, 58], []], 'food': [49, 22], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 38, 'geese': [[8, 7, 6], [33, 44], [48, 59], []], 'food': [49, 22], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 39, 'geese': [[9, 8, 7], [22, 33, 44], [37, 48

{'remainingOverageTime': 60, 'step': 7, 'geese': [[64, 63], [24], [52], [17]], 'food': [34, 11], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 8, 'geese': [[65, 64], [25], [53], [18]], 'food': [34, 11], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 9, 'geese': [[55, 65], [26], [54], [19]], 'food': [34, 11], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 10, 'geese': [[56, 55], [27], [44], [20]], 'food': [34, 11], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 11, 'geese': [[57, 56], [28], [45], [21]], 'food': [34, 11], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 12, 'geese': [[58, 57], [29], [46], [11, 21]], 'food': [34, 36], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 13, 'geese': [[59, 58], [30], [47], [12, 11]], 'food': [34, 36], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 14, 'geese': [[60, 59], [31], [48], [13, 12]], 'food': [34, 36], 'index': 0}
reward:  -1
{'remainingOver

{'remainingOverageTime': 60, 'step': 47, 'geese': [[43, 42, 41], [40], [], []], 'food': [65, 9], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 48, 'geese': [[54, 43, 42], [29], [], []], 'food': [65, 9], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 49, 'geese': [[65, 54, 43, 42], [28], [], []], 'food': [9, 48], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 50, 'geese': [[55, 65, 54, 43], [27], [], []], 'food': [9, 48], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 51, 'geese': [[56, 55, 65, 54], [16], [], []], 'food': [9, 48], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 52, 'geese': [[57, 56, 55, 65], [5], [], []], 'food': [9, 48], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 53, 'geese': [[68, 57, 56, 55], [71], [], []], 'food': [9, 48], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 54, 'geese': [[2, 68, 57, 56], [72], [], []], 'food': [9, 48], 'index': 0}
reward:  -1
{'remaini

Goose Collision: EAST
Goose Collision: SOUTH
{'remainingOverageTime': 60, 'step': 7, 'geese': [[], [], [28, 17], [53]], 'food': [11, 59], 'index': 0}
reward:  -150
Done, Step:  6
status,  DONE
INFO:tensorflow:Assets written to: central_agent/trial-110\assets
episode number:  564
{'remainingOverageTime': 60, 'step': 1, 'geese': [[67], [55], [33], [9]], 'food': [24, 14], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[1], [66], [44], [20]], 'food': [24, 14], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[12], [67], [45], [21]], 'food': [24, 14], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[13], [68], [46], [11]], 'food': [24, 14], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[24, 13], [69], [47], [12]], 'food': [14, 25], 'index': 0}
reward:  100
Goose Collision: EAST
{'remainingOverageTime': 60, 'step': 6, 'geese': [[25, 24, 13], [70], [48], []], 'food': [14, 37], 'index': 0}

Goose Collision: SOUTH
{'remainingOverageTime': 60, 'step': 27, 'geese': [[3, 69, 68, 67], [], [23], []], 'food': [50, 28], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 28, 'geese': [[4, 3, 69, 68], [], [24], []], 'food': [50, 28], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 29, 'geese': [[5, 4, 3, 69], [], [25], []], 'food': [50, 28], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 30, 'geese': [[6, 5, 4, 3], [], [26], []], 'food': [50, 28], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 31, 'geese': [[17, 6, 5, 4], [], [37], []], 'food': [50, 28], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 32, 'geese': [[28, 17, 6, 5, 4], [], [48], []], 'food': [50, 49], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 33, 'geese': [[39, 28, 17, 6, 5], [], [59], []], 'food': [50, 49], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 34, 'geese': [[50, 39, 28, 17, 6, 5], [], [70], []], 'food': [49, 

{'remainingOverageTime': 60, 'step': 32, 'geese': [[60, 59, 58, 57, 56, 55], [], [9], [15]], 'food': [62, 28], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 33, 'geese': [[61, 60, 59, 58, 57, 56], [], [10], [16]], 'food': [62, 28], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 34, 'geese': [[62, 61, 60, 59, 58, 57, 56], [], [0], [17]], 'food': [28, 30], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 35, 'geese': [[63, 62, 61, 60, 59, 58, 57], [], [66], [6]], 'food': [28, 30], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 36, 'geese': [[74, 63, 62, 61, 60, 59, 58], [], [55], [72]], 'food': [28, 30], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 37, 'geese': [[75, 74, 63, 62, 61, 60, 59], [], [56], [73]], 'food': [28, 30], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 38, 'geese': [[9, 75, 74, 63, 62, 61, 60], [], [67], [7]], 'food': [28, 30], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'st

{'remainingOverageTime': 60, 'step': 32, 'geese': [[21, 20, 19, 18, 7, 6], [], [73, 62], []], 'food': [75, 60], 'index': 0}
reward:  -1
Goose Collision: SOUTH
{'remainingOverageTime': 60, 'step': 33, 'geese': [[11, 21, 20, 19, 18, 7], [], [], []], 'food': [75, 60], 'index': 0}
reward:  500
Done, Step:  32
status,  DONE
episode number:  571
{'remainingOverageTime': 60, 'step': 1, 'geese': [[27], [50], [63], [68]], 'food': [28, 59], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[28, 27], [51], [64], [69]], 'food': [59, 39], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 3, 'geese': [[39, 28, 27], [62], [75], [3]], 'food': [59, 12], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 4, 'geese': [[40, 39, 28], [73], [9], [14]], 'food': [59, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 5, 'geese': [[41, 40, 39], [7], [20], [25]], 'food': [59, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese

{'remainingOverageTime': 60, 'step': 27, 'geese': [[71, 70, 69, 68, 67, 56], [], [], [74]], 'food': [10, 18], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 28, 'geese': [[72, 71, 70, 69, 68, 67], [], [], [8]], 'food': [10, 18], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 29, 'geese': [[73, 72, 71, 70, 69, 68], [], [], [19]], 'food': [10, 18], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 30, 'geese': [[7, 73, 72, 71, 70, 69], [], [], [30]], 'food': [10, 18], 'index': 0}
reward:  -1
Opposite action: (0, <Action.NORTH: 1>, <Action.SOUTH: 3>)
{'remainingOverageTime': 60, 'step': 31, 'geese': [[], [], [], [41]], 'food': [10, 18], 'index': 0}
reward:  -150
Done, Step:  30
status,  DONE
INFO:tensorflow:Assets written to: central_agent/trial-120\assets
episode number:  574
{'remainingOverageTime': 60, 'step': 1, 'geese': [[46], [1], [28], [70]], 'food': [54, 51], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[47], [2],

{'remainingOverageTime': 60, 'step': 47, 'geese': [[45, 44], [18, 29], [], []], 'food': [11, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 48, 'geese': [[46, 45], [7, 18], [], []], 'food': [11, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 49, 'geese': [[47, 46], [73, 7], [], []], 'food': [11, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 50, 'geese': [[48, 47], [62, 73], [], []], 'food': [11, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 51, 'geese': [[59, 48], [51, 62], [], []], 'food': [11, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 52, 'geese': [[70, 59], [52, 51], [], []], 'food': [11, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 53, 'geese': [[4, 70], [63, 52], [], []], 'food': [11, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 54, 'geese': [[5, 4], [64, 63], [], []], 'food': [11, 16], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 

Goose Collision: NORTH
{'remainingOverageTime': 60, 'step': 17, 'geese': [[76, 75], [13], [], [71, 5]], 'food': [38, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 18, 'geese': [[66, 76], [14], [], [72, 71]], 'food': [38, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 19, 'geese': [[67, 66], [3], [], [61, 72]], 'food': [38, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 20, 'geese': [[68, 67], [69], [], [50, 61]], 'food': [38, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 21, 'geese': [[2, 68], [58], [], [39, 50]], 'food': [38, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 22, 'geese': [[3, 2], [59], [], [40, 39]], 'food': [38, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 23, 'geese': [[4, 3], [48], [], [29, 40]], 'food': [38, 46], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 24, 'geese': [[5, 4], [49], [], [30, 29]], 'food': [38, 46], 'index': 0}
reward

Opposite action: (0, <Action.WEST: 4>, <Action.EAST: 2>)
{'remainingOverageTime': 60, 'step': 12, 'geese': [[], [], [17, 16], [27]], 'food': [64, 33], 'index': 0}
reward:  -150
Done, Step:  11
status,  DONE
episode number:  582
{'remainingOverageTime': 60, 'step': 1, 'geese': [[10], [33], [13], [40, 29]], 'food': [19, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 2, 'geese': [[0], [34], [14], [41, 40]], 'food': [19, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 3, 'geese': [[1], [35], [15], [42, 41]], 'food': [19, 12], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 4, 'geese': [[12, 1], [46], [26], [53, 42]], 'food': [19, 71], 'index': 0}
reward:  100
{'remainingOverageTime': 60, 'step': 5, 'geese': [[23, 12], [47], [27], [54, 53]], 'food': [19, 71], 'index': 0}
reward:  -1
{'remainingOverageTime': 60, 'step': 6, 'geese': [[22, 23], [48], [28], [44, 54]], 'food': [19, 71], 'index': 0}
reward:  -1
Goose Collision: SOUTH
Goose Collisi

### Ideas for future improvement:
- Should get bonus reward with the action of actually collecting food
- Should be penalized harder for collision than starvation
- Add hunger feature

In [20]:
model = keras.models.load_model('central_agent/trial-453')


In [21]:
MyAgent = dqnAgent(model = model,
                   epsilon = 0,
                   epsilon_min = 0)
MyAgent.StateTrans.set_last_action('NORTH')

my state shape is: 85


In [22]:
env = make("hungry_geese", debug=True)


In [29]:
env.reset(num_agents=4)
results = env.run([MyAgent, GreedyAgent(), GreedyAgent(), GreedyAgent()])

Opposite action: (2, <Action.EAST: 2>, <Action.WEST: 4>)
Opposite action: (1, <Action.WEST: 4>, <Action.EAST: 2>)
Goose Collision: EAST


In [30]:
env.render(mode="ipython")

In [None]:
results

In [None]:
env.configuration