In [1]:
from dataclasses import dataclass

@dataclass(frozen=True)
class GrassCutterState:
    size: int
    grass_position: tuple[tuple[int, int]]
    current_row: int = 0
    current_col: int = 0
    move_times: int = 0

    def __repr__(self):
        state_array = []
        for row in range(self.size):
            row_array = []
            for col in range(self.size):
                has_grass = (row, col) in self.grass_position
                is_robot = (row, col) == (self.current_row, self.current_col)
                if has_grass and is_robot:
                    row_array.append('R')
                elif has_grass:
                    row_array.append('G')
                elif is_robot:
                    row_array.append('r')
                else:
                    row_array.append(' ')
            state_array.append(row_array)
        return '\n'.join(['|'.join(row) for row in state_array]) + f'\nMove times: {self.move_times}\n'
    
    def __hash__(self):
        return hash((self.size, self.grass_position, self.current_row, self.current_col, self.move_times))
    
    def __eq__(self, other):
        return self.__hash__() == other.__hash__()
    
    def __lt__(self, other):
        return self.move_times < other.move_times

In [2]:
# Create environment
import random

class GrassCutterEnvironment:

    move_str_to_delta = {
        'U': (-1, 0),
        'D': (1, 0),
        'L': (0, -1),
        'R': (0, 1)
    }
    
    @classmethod
    def create_init_state(cls, size: int, random_seed=4242) -> GrassCutterState:
        random.seed(random_seed)
        grass_position = tuple([(random.randint(0, size-1), random.randint(0, size-1)) for _ in range(size)])
        return GrassCutterState(size, grass_position)
    
    @classmethod
    def get_valid_moves(cls, state: GrassCutterState) -> list[str]:
        valid_moves = ['C']
        if state.current_row > 0:
            valid_moves.append('U')
        if state.current_row < state.size - 1:
            valid_moves.append('D')
        if state.current_col > 0:
            valid_moves.append('L')
        if state.current_col < state.size - 1:
            valid_moves.append('R')
        return valid_moves
    
    @classmethod
    def apply_move(cls, state: GrassCutterState, move: str) -> GrassCutterState:
        if move not in cls.get_valid_moves(state):
            raise ValueError(f'Invalid move {move}')
        if move == 'C':
            new_grass_position = tuple([pos for pos in state.grass_position if pos != (state.current_row, state.current_col)])
            return GrassCutterState(state.size, new_grass_position, state.current_row, state.current_col, state.move_times + 1)
        else:
            delta_row, delta_col = cls.move_str_to_delta[move]
            new_row = state.current_row + delta_row
            new_col = state.current_col + delta_col
            return GrassCutterState(state.size, state.grass_position, new_row, new_col, state.move_times + 1)
        
    @classmethod
    def is_terminal(cls, state: GrassCutterState) -> bool:
        return len(state.grass_position) == 0
    

In [5]:
import time
from concurrent.futures import ThreadPoolExecutor

def run_simulation(size: int, total_games: int, search_strategy: callable, use_threads: bool = False, verbose: bool = False) -> None:
    max_move_to_end = float('-inf')
    min_move_to_end = float('inf')
    sum_move_to_end = 0
    sum_time_taken = 0
    end_game_count = 0

    def run_simulation_single(i):
        state = GrassCutterEnvironment.create_init_state(size, random_seed=i)
        
        start_time = time.time()
        end_state = search_strategy(state)
        end_time = time.time()

        return end_state, end_time - start_time
    
    if use_threads:
        with ThreadPoolExecutor() as executor:
            end_states = list(executor.map(run_simulation_single, range(total_games)))
        
        for end_state, time_taken in end_states:
            if GrassCutterEnvironment.is_terminal(end_state):
                end_game_count += 1
                max_move_to_end = max(max_move_to_end, end_state.move_times)
                min_move_to_end = min(min_move_to_end, end_state.move_times)
                sum_move_to_end += end_state.move_times
                sum_time_taken += time_taken
            
            if verbose:
                print(end_state)
    else:
        for i in range(total_games):
            state = GrassCutterEnvironment.create_init_state(size, random_seed=i)
            
            start_time = time.time()
            end_state = search_strategy(state)
            end_time = time.time()
            
            if GrassCutterEnvironment.is_terminal(end_state):
                end_game_count += 1
                max_move_to_end = max(max_move_to_end, end_state.move_times)
                min_move_to_end = min(min_move_to_end, end_state.move_times)
                sum_move_to_end += end_state.move_times
                sum_time_taken += end_time - start_time
            
            if verbose:
                print(end_state)

    print('Random search result:')
    print(f'\t      Total games: {total_games:,}')
    print(f'\t Total end states: {end_game_count:,}({end_game_count/total_games:.2%}%)')
    print(f'\t Max moves to end: {max_move_to_end:,}')
    print(f'\t Min moves to end: {min_move_to_end:,}')
    print(f'\tMean moves to end: {sum_move_to_end / (end_game_count+1e-10):,.2f}')
    print(f'\t Total time taken: {sum_time_taken:,.4f} seconds')
    print(f'\t   Avg time taken: {sum_time_taken / (end_game_count+1e-10):,.4f} seconds')

Use grase amount and move amount quality function
- optimization
  1. will not simulate the same state twice (also count the number of moves)
  2. penalty for the number of moves (to avoid too many moves) by the factor of 2
  3. penalty for the number of left grass (to avoid too many moves) by the factor of 1
  4. as we know the optimal solution is less than 20.

In [6]:
# Import priority queue
from queue import PriorityQueue
from functools import partial

def optimized_quality_function(state: GrassCutterState) -> int:
    # Compute inverse grass amount quality function
    inverse_grass_amount = - len(state.grass_position)

    # Compute inverse move times quality function
    # inverse_move_times = - state.move_times * 2
    inverse_move_times = 0

    return inverse_grass_amount + inverse_move_times 
    

def quality_based_search(state: GrassCutterState, quality_function: callable, verbose: bool = False, max_move: int = float('inf')) -> GrassCutterState:
    visited_states = set()
    queue = PriorityQueue()
    queue.put((-quality_function(state), state))
    best_state = state
    best_move_times = max_move
    while not queue.empty():
        _, state = queue.get()
        # Check if state is terminal
        if GrassCutterEnvironment.is_terminal(state):
            if state.move_times < best_move_times:
                best_state = state
                best_move_times = state.move_times
            continue
        # Add new states to the queue
        valid_moves = GrassCutterEnvironment.get_valid_moves(state)
        for move in valid_moves:
            new_state = GrassCutterEnvironment.apply_move(state, move)
            if new_state not in visited_states and new_state.move_times < best_move_times:
                visited_states.add(new_state)
                queue.put((-quality_function(new_state), new_state))
            if new_state.move_times >= best_move_times:
                break
        if verbose:
            print(state)
            print('----------------')
    return best_state

quality_based_search = partial(quality_based_search, quality_function=optimized_quality_function, max_move=200)
run_simulation(10, 10, quality_based_search)

Random search result:
	      Total games: 10
	 Total end states: 10(100.00%%)
	 Max moves to end: 47
	 Min moves to end: 33
	Mean moves to end: 41.60
	 Total time taken: 94.9381 seconds
	   Avg time taken: 9.4938 seconds
