<a href="https://colab.research.google.com/github/faezesarlakifar/ChillinWars-Agent/blob/main/GeneticMiniMax_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import numpy as np
import copy

# chillin imports
from chillin_client import RealtimeAI

# project imports
from ks.models import ECell, EDirection, Position
from ks.commands import ChangeDirection, ActivateWallBreaker

# Constants
POPULATION_SIZE = 10
MAX_GENERATIONS = 10
MAX_DEPTH = 3
MUTATION_RATE = 0.1

class AI(RealtimeAI):

    def __init__(self, world):
        super(AI, self).__init__(world)
        self.world = world

    def initialize(self):
        print('initialize')

    def decide(self):
        print('decide')
        self.client1()

    def client1(self):
        state = self.world  # Initial state
        my_team = self.my_side

        best_direction = self.genetic_minimax(state, MAX_GENERATIONS, POPULATION_SIZE)
        self.send_command(ChangeDirection(best_direction))

    def genetic_minimax(self, state, max_generations, population_size):
        population = self.initialize_population(population_size)
        for generation in range(max_generations):
            fitness_scores = []
            for individual in population:
                fitness_score = self.evaluate_individual(state, individual)
                fitness_scores.append(fitness_score)

            population = self.select_parents(population, fitness_scores, population_size)
            population = self.crossover(population, population_size)
            population = self.mutate(population)

        best_individual = population[0]
        return best_individual

    def initialize_population(self, population_size):
      population = []
      directions = list(EDirection)
      for _ in range(population_size):
          individual = [random.choice(directions)]
          for _ in range(MAX_DEPTH - 1):
              previous_direction = individual[-1]
              valid_directions = [direction for direction in directions if not self.is_opposite(previous_direction, direction)]
              individual.append(random.choice(valid_directions))
          population.append(individual)
      return population

    def mutate(self, population):
        for i in range(len(population)):
            for j in range(MAX_DEPTH):
                if random.random() < MUTATION_RATE:
                    previous_direction = population[i][j-1]
                    valid_directions = [direction for direction in list(EDirection) if not self.is_opposite(previous_direction, direction)]
                    population[i][j] = random.choice(valid_directions)
        return population

    def is_opposite(self, direction1, direction2):
        if (direction1 == EDirection.Up and direction2 == EDirection.Down) or \
                (direction1 == EDirection.Down and direction2 == EDirection.Up) or \
                (direction1 == EDirection.Left and direction2 == EDirection.Right) or \
                (direction1 == EDirection.Right and direction2 == EDirection.Left):
            return True
        return False

    def evaluate_individual(self, state, individual):
        total_score = 0
        current_state = copy.deepcopy(state)
        for direction in individual:
            next_state, current_agent = self.get_next_state(current_state, direction)
            total_score += self.evaluate_state(next_state, current_agent)
            current_state = next_state
        return total_score

    def select_parents(self, population, fitness_scores, population_size):
        selected_population = []
        probabilities = np.array(fitness_scores) / sum(fitness_scores)
        for _ in range(population_size):
            selected_individual = random.choices(population, probabilities)[0]
            selected_population.append(selected_individual)
        return selected_population

    def crossover(self, population, population_size):
        new_population = []
        for _ in range(population_size):
            parent1 = random.choice(population)
            parent2 = random.choice(population)
            crossover_point = random.randint(1, MAX_DEPTH - 1)
            child = parent1[:crossover_point] + parent2[crossover_point:]
            new_population.append(child)
        return new_population

    def _get_our_agent_empty_neighbors(self,state):
        empty_neighbors = []

        our_position = self._get_our_agent_position(state)

        their_position = self._get_their_agent_position(state)
        if our_position.x + 1 < len(state.board[0]):
            if state.board[our_position.y][our_position.x + 1] == ECell.Empty and \
                    not (our_position.x + 1 == their_position.x and our_position.y == their_position.y):
                empty_neighbors.append(EDirection.Right)
        if our_position.x - 1 >= 0:
            if state.board[our_position.y][our_position.x - 1] == ECell.Empty and \
                    not (our_position.x - 1 == their_position.x and our_position.y == their_position.y):
                empty_neighbors.append(EDirection.Left)
        if our_position.y + 1 < len(state.board):
            if state.board[our_position.y + 1][our_position.x] == ECell.Empty and \
                    not (our_position.x == their_position.x and our_position.y + 1 == their_position.y):
                empty_neighbors.append(EDirection.Down)
        if our_position.y - 1 >= 0:
            if state.board[our_position.y - 1][our_position.x] == ECell.Empty and \
                    not (our_position.x == their_position.x and our_position.y - 1 == their_position.y):
                empty_neighbors.append(EDirection.Up)
        return empty_neighbors

    def _get_our_agent_blue_wall_neighbors(self, state):
        blue_walls = []
        our_position = self._get_our_agent_position(state)
        their_position = self._get_their_agent_position(state)
        if our_position.x + 1 < len(state.board[0]):
            if state.board[our_position.y][our_position.x + 1] == ECell.BlueWall and \
                    not (our_position.x + 1 == their_position.x and our_position.y == their_position.y):
                blue_walls.append(EDirection.Right)
        if our_position.x - 1 >= 0:
            if state.board[our_position.y][our_position.x - 1] == ECell.BlueWall and \
                    not (our_position.x - 1 == their_position.x and our_position.y == their_position.y):
                blue_walls.append(EDirection.Left)
        if our_position.y + 1 < len(state.board):
            if state.board[our_position.y + 1][our_position.x] == ECell.BlueWall and \
                    not (our_position.x == their_position.x and our_position.y + 1 == their_position.y):
                blue_walls.append(EDirection.Down)
        if our_position.y - 1 >= 0:
            if state.board[our_position.y - 1][our_position.x] == ECell.BlueWall and \
                    not (our_position.x == their_position.x and our_position.y - 1 == their_position.y):
                blue_walls.append(EDirection.Up)
        return blue_walls

    def _get_our_agent_yellow_wall_neighbors(self, state):
        yellow_walls = []
        our_position = self._get_our_agent_position(state)
        their_position = self._get_their_agent_position(state)
        if our_position.x + 1 < len(state.board[0]):
            if state.board[our_position.y][our_position.x + 1] == ECell.YellowWall and \
                    not (our_position.x + 1 == their_position.x and our_position.y == their_position.y):
                yellow_walls.append(EDirection.Right)
        if our_position.x - 1 >= 0:
            if state.board[our_position.y][our_position.x - 1] == ECell.YellowWall and \
                    not (our_position.x - 1 == their_position.x and our_position.y == their_position.y):
                yellow_walls.append(EDirection.Left)
        if our_position.y + 1 < len(state.board):
            if state.board[our_position.y + 1][our_position.x] == ECell.YellowWall and \
                    not (our_position.x == their_position.x and our_position.y + 1 == their_position.y):
                yellow_walls.append(EDirection.Down)
        if our_position.y - 1 >= 0:
            if state.board[our_position.y - 1][our_position.x] == ECell.YellowWall and \
                    not (our_position.x == their_position.x and our_position.y - 1 == their_position.y):
                yellow_walls.append(EDirection.Up)
        return yellow_walls

    def _get_our_agent_Area_wall_neighbors(self, state):
        area_walls = []
        our_position = self._get_our_agent_position(state)
        their_position = self._get_their_agent_position(state)
        if our_position.x + 1 < len(state.board[0]):
            if state.board[our_position.y][our_position.x + 1] == ECell.AreaWall and \
                    not (our_position.x + 1 == their_position.x and our_position.y == their_position.y):
                area_walls.append(EDirection.Right)
        if our_position.x - 1 >= 0:
            if state.board[our_position.y][our_position.x - 1] == ECell.AreaWall and \
                    not (our_position.x - 1 == their_position.x and our_position.y == their_position.y):
                area_walls.append(EDirection.Left)
        if our_position.y + 1 < len(state.board):
            if state.board[our_position.y + 1][our_position.x] == ECell.AreaWall and \
                    not (our_position.x == their_position.x and our_position.y + 1 == their_position.y):
                area_walls.append(EDirection.Down)
        if our_position.y - 1 >= 0:
            if state.board[our_position.y - 1][our_position.x] == ECell.AreaWall and \
                    not (our_position.x == their_position.x and our_position.y - 1 == their_position.y):
                area_walls.append(EDirection.Up)
        return area_walls

    def _get_our_agent_position(self, state):
        return state.agents[self.my_side].position

    def _get_their_agent_position(self, state):
        return state.agents[self.other_side].position

    def evaluate_state(self, state, current_agent):
      
        max_cycles = self.world.constants.max_cycles
        #init_health = self.world.constants.init_health
        wall_score_coefficient = self.world.constants.wall_score_coefficient
        area_wall_crash_score = self.world.constants.area_wall_crash_score
        my_wall_crash_score = self.world.constants.my_wall_crash_score
        enemy_wall_crash_score = self.world.constants.enemy_wall_crash_score

        empty_neighbors = self._get_our_agent_empty_neighbors(state)
        blue_walls = self._get_our_agent_blue_wall_neighbors(state)
        yellow_walls = self._get_our_agent_yellow_wall_neighbors(state)
        area_walls = self._get_our_agent_Area_wall_neighbors(state)

        my_agent = current_agent
        opponent_agent = state.agents[self.other_side]

        my_score = state.scores[my_agent]
        opponent_score = state.scores[opponent_agent]

        # Check if the game is over
        if state.cycle >= max_cycles or my_agent.health <= 0 or opponent_agent.health <= 0 or ((my_agent.position.x == opponent_agent.position.x) and (my_agent.position.y == opponent_agent.position.y)) :
            # Return a higher value for a better state and a lower value for a worse state
            if my_score > opponent_score:
                return float('inf')
            elif my_score < opponent_score:
                return float('-inf')
            else:
                return 0

        # Evaluate the quality of the state based on the score difference
        score_diff = my_score - opponent_score

        my_wall_penalty = 0
        enemy_wall_penalty = 0
        area_wall_penalty = 0
        wall_coefficient_reward = 0

        # Compute the penalties for hitting walls
        if(self.my_side == 'Yellow'):
          if (my_agent.position in yellow_walls):
              my_wall_penalty = my_wall_crash_score
              if(my_agent.wall_breaker_rem_time < 1):
                my_agent.health -= 1
          elif(my_agent.position in blue_walls):
              enemy_wall_penalty = enemy_wall_crash_score
              if(my_agent.wall_breaker_rem_time < 1):
                my_agent.health -= 1
        else:
          if (my_agent.position in yellow_walls):
              enemy_wall_penalty = enemy_wall_crash_score
              if(my_agent.wall_breaker_rem_time < 1):
                my_agent.health -= 1
          elif(my_agent.position in blue_walls):
              my_wall_penalty = my_wall_crash_score
              if(my_agent.wall_breaker_rem_time < 1):
                my_agent.health -= 1


        # Compute the penalty for hitting area walls
        area_wall_penalty = 0
        if my_agent.position in area_walls:
            area_wall_penalty = area_wall_crash_score
            my_agent.health = 0

        if my_agent.position in empty_neighbors:
            wall_coefficient_reward = wall_score_coefficient
        # Compute the score for the state
        state_score = score_diff + my_wall_penalty + enemy_wall_penalty + area_wall_penalty + wall_coefficient_reward + my_agent.health
        return state_score

    def get_next_state(self, state, direction):
        
        next_state = copy.deepcopy(state)
        current_agent = copy.deepcopy(state.agents[self.my_side])
        
        #apply direction to the state and make virtual next state
        
        # update state board ----> applied on evaluate_state function
        
        # update state agents
        if(direction == EDirection.Up):
            current_agent.position.y -= 1
        elif(direction == EDirection.Down):  
            current_agent.position.y += 1
        elif(direction == EDirection.Right): 
            current_agent.position.x += 1
        elif(direction == EDirection.Left):
            current_agent.position.x -= 1
        
        # update state scores ----> applied on evaluate_state function
        
        return next_state, current_agent
