In [1]:
from tqdm.auto import tqdm
import numpy as np
from icecream import ic

In [2]:
PUZZLE_DIM = 5
RANDOMIZE_STEPS = 100_000
HEURISTIC_WEIGHT = 1.5
HEURISTIC_WEIGHT_MULT=1.05
HEURISTIC_WEIGHT_MSTEPS = 10**PUZZLE_DIM
MAX_HEURISTIC_WEIGHT = 3

In [3]:
rng = np.random.Generator(np.random.PCG64([PUZZLE_DIM, RANDOMIZE_STEPS]))

As priority value, i will use heuristic expected cost function `h`.

In this case, i used **manhattan distance** as `h`, proven to be the best choice for path searching problems in grid-like environments where movement is restricted to horizontal and vertical directions


*sources:<br/>https://theory.stanford.edu/~amitp/GameProgramming/Heuristics.html, <br/> https://pages.cs.wisc.edu/~dyer/cs540/notes/search2.html*

In [4]:

class State:

    state: np.ndarray
    depth: int
    last_move: str
    parent: "State"

    def __init__(self, state: np.ndarray,depth:int=1,last_move:str=None,parent: "State" = None):
        self.state = state
        self.depth = depth
        self.last_move = last_move
        self.parent = parent

    @staticmethod
    def getSolution():
        return State(np.array([i for i in range(1, PUZZLE_DIM**2)]+[0]).reshape(PUZZLE_DIM, PUZZLE_DIM))

    @staticmethod
    def getRandom():
        return State(rng.permutation(State.getSolution().state))
    
    def randomize(self, steps: int):
        for _ in tqdm(range(steps), desc="Randomizing"):
            action = rng.choice(self.available_actions())
            self.state = self.do_action(action,as_child=False).state

    def zeropos(self):
        for i in range(PUZZLE_DIM):
            for j in range(PUZZLE_DIM):
                if self.state[i,j] == 0:
                    return i,j
        raise ValueError("No zero found")


    def available_actions(self) -> list[str]:
        x, y = self.zeropos()
        actions = list()
        if x > 0:
            actions.append("U") 
        if x < PUZZLE_DIM - 1:
            actions.append("D")
        if y > 0:
            actions.append("L")
        if y < PUZZLE_DIM - 1:
            actions.append("R")
        return actions
    
    def swap(self, pos1, pos2):
        self.state[pos1], self.state[pos2] = self.state[pos2], self.state[pos1]

    def do_action(self, action: str,*,as_child=True) -> np.ndarray:
        if as_child:
            new_state = State(self.state.copy(),self.depth+1,action,self)
        else:
            new_state = self # directly apply the action to the current state
        
        x, y = new_state.zeropos()
        if action == "U":
            new_state.swap((x, y), (x-1, y))
        elif action == "D":
            new_state.swap((x, y), (x+1, y))
        elif action == "L":
            new_state.swap((x, y), (x, y-1))
        elif action == "R":
            new_state.swap((x, y), (x, y+1))
        else:
            raise ValueError("Invalid action")
        return new_state
    
    def cost(self):
        return self.depth-1

    def __str__(self):
        return str(self.state)
    
    def __repr__(self):
        return str(self.state)
    
    def __eq__(self, other):
        return np.array_equal(self.state, other.state)
    
    def __hash__(self):
        return hash(self.state.tobytes())
    
    def manhattan_distance(self):
        distance = 0
        for i in range(PUZZLE_DIM):
            for j in range(PUZZLE_DIM):
                if self.state[i, j] == 0:
                    continue # skip the hole
                d, m = divmod(self.state[i, j] - 1, PUZZLE_DIM)
                distance += abs(d - i) + abs(m - j)
        return distance
    
    def tot_cost(self):
        return self.cost() + HEURISTIC_WEIGHT*self.manhattan_distance()
    
    def __lt__(self, other):
        return self.manhattan_distance() < other.manhattan_distance()
    
    def is_solved(self):
        return np.array_equal(self.state, State.getSolution().state)
    

In [5]:
def reconstruct_path(state: State):
    """ Reconstructs the path from the initial state to the given state from the hole POV """
    path = list()
    if state is not None:
        while state.last_move is not None:
            path.append(state.last_move)
            state = state.parent
    return path[::-1] # reverse the path

def hole_to_tile(haction:str):
    """ Returns the opposite encoding, switching from moving the hole to moving the tile """
    if haction == "U":
        return "D"
    if haction == "D":
        return "U"
    if haction == "L":
        return "R"
    if haction == "R":
        return "L"
    raise ValueError("Invalid action")

def expand_encoding(act:str)->str:
    """ Returns the full action string """
    if act == "U":
        return "Up"
    if act == "D":
        return "Down"
    if act == "L":
        return "Left"
    if act == "R":
        return "Right"
    raise ValueError("Invalid action")

def movingTilePos(parent_state:State,tile_action:str):
    """ Returns the position of the tile before the movement """
    x,y = parent_state.zeropos()
    if tile_action == "U":
        return x+1,y
    if tile_action == "D":
        return x-1,y
    if tile_action == "L":
        return x,y+1
    if tile_action == "R":
        return x,y-1
    raise ValueError("Invalid action")

def explain_path(state: State):
    """ Explains the path printing the actions the user would do (in tile encoding) """
    path = list()
    if state is not None:
        while state.last_move is not None:
            tile_action = hole_to_tile(state.last_move)
            state = state.parent
            tpos = movingTilePos(state,tile_action)
            path.append((tile_action,tpos))
    path = path[::-1] # reverse the path
    for action, pos in path:
        print(f"Move tile \"{state.state[pos]}\" {expand_encoding(action)}")

In [6]:

state = State.getSolution()
state.randomize(RANDOMIZE_STEPS)
ic(state)
ic(state.manhattan_distance())

Randomizing:   0%|          | 0/100000 [00:00<?, ?it/s]

ic| state: [[23 22 21  8  3]
            [19  0 18 13 15]
            [ 6 11 12  9  1]
            [24  4 17 20 14]
            [10  5 16  7  2]]
ic| state.manhattan_distance(): np.int64(82)


np.int64(82)

## A* Algorithm

First let's define a priority queue using `heapq` module:

In [7]:
import heapq

class PriorityQueue:
    def __init__(self):
        self.elements = []
    
    def empty(self):
        return len(self.elements) == 0
    
    def append(self, item, priority):
        heapq.heappush(self.elements, (priority, item))
    
    def popleft(self):
        return heapq.heappop(self.elements)[1]
    
    def __bool__(self):
        """ Override the boolean operator to allow direct check with while and if statements """
        return not self.empty()
    
    def __len__(self):
        return len(self.elements)
    
    def __str__(self):
        return str(self.elements)
    
    def __repr__(self):
        return str(self.elements)
    
class StatePriorityQueue(PriorityQueue):
    def append(self, item: State):
        super().append(item, item.tot_cost())

In [8]:
frontier = StatePriorityQueue()
frontier.append(state)
visited = set()
visited.add(state)
ic(frontier.elements);

ic| frontier.elements: [(np.float64(123.0),
                         [[23 22 21  8  3]
                        [19  0 18 13 15]
                        [ 6 11 12  9  1]
                        [24  4 17 20 14]
                        [10  5 16  7  2]])]


In [9]:
cycle=0
while frontier:
    current_state = frontier.popleft()
    visited.add(current_state) # avoid using np.array as keys in a set
    if current_state.is_solved():
        print("Solved!")
        break
    actions = current_state.available_actions()
    for action in actions:
        new_state = current_state.do_action(action)
        if new_state not in visited:
            frontier.append(new_state)
    if cycle % HEURISTIC_WEIGHT_MSTEPS == 0:
        print(f"current_state (@cycle-{cycle}):\n{current_state}")
        print(f"current statistics:\nevalued states: {len(visited)}\nfrontier states: {len(frontier)}")
        if HEURISTIC_WEIGHT < MAX_HEURISTIC_WEIGHT:
            old_heuristic_weight = HEURISTIC_WEIGHT
            HEURISTIC_WEIGHT *= HEURISTIC_WEIGHT_MULT
            if HEURISTIC_WEIGHT > MAX_HEURISTIC_WEIGHT:
                HEURISTIC_WEIGHT = MAX_HEURISTIC_WEIGHT
            print(f"incrementing heuristic weight from {old_heuristic_weight} to {HEURISTIC_WEIGHT}\n")
        else:
            print(f"heuristic weight is already at max value: {HEURISTIC_WEIGHT}\n")
    cycle += 1

current_state (@cycle-0):
[[23 22 21  8  3]
 [19  0 18 13 15]
 [ 6 11 12  9  1]
 [24  4 17 20 14]
 [10  5 16  7  2]]
current statistics:
evalued states: 1
frontier states: 4
incrementing heuristic weight from 1.5 to 1.5750000000000002

current_state (@cycle-100000):
[[23 21  8  3 15]
 [ 6 22 13  9  1]
 [11 12 19  4 14]
 [16 10 17  5 20]
 [ 0 24  7 18  2]]
current statistics:
evalued states: 99546
frontier states: 110699
incrementing heuristic weight from 1.5750000000000002 to 1.6537500000000003

current_state (@cycle-200000):
[[ 6 23  8  1  3]
 [21  0 22  9  5]
 [11 12 13 14 15]
 [10  7 19  4 20]
 [16 17 18 24  2]]
current statistics:
evalued states: 197337
frontier states: 250375
incrementing heuristic weight from 1.6537500000000003 to 1.7364375000000003

current_state (@cycle-300000):
[[23 22 21  8  3]
 [ 6 12 19  9 15]
 [11 13  1  5 14]
 [16 10 18  2  4]
 [24  7 17  0 20]]
current statistics:
evalued states: 295682
frontier states: 371888
incrementing heuristic weight from 1.7364375

In [10]:
print(cycle)
len(visited)

2305539


2232276

In [11]:
print(current_state)
print(f"in {current_state.cost()} steps:")
path = reconstruct_path(current_state)
print(path)
print(len(path))
explain_path(current_state)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24  0]]
in 150 steps:
['L', 'D', 'R', 'R', 'U', 'R', 'D', 'R', 'D', 'L', 'L', 'L', 'D', 'R', 'R', 'U', 'L', 'L', 'D', 'L', 'U', 'R', 'D', 'R', 'R', 'U', 'L', 'L', 'U', 'U', 'U', 'R', 'R', 'D', 'D', 'D', 'L', 'U', 'L', 'D', 'R', 'U', 'U', 'R', 'D', 'D', 'D', 'R', 'U', 'U', 'U', 'L', 'D', 'D', 'L', 'L', 'U', 'U', 'U', 'L', 'D', 'R', 'R', 'U', 'L', 'D', 'R', 'U', 'R', 'R', 'D', 'L', 'D', 'L', 'U', 'U', 'R', 'D', 'L', 'D', 'L', 'D', 'D', 'R', 'U', 'U', 'U', 'L', 'D', 'D', 'D', 'L', 'U', 'R', 'R', 'D', 'L', 'U', 'R', 'R', 'D', 'L', 'U', 'U', 'R', 'D', 'R', 'D', 'L', 'L', 'U', 'U', 'R', 'U', 'L', 'D', 'D', 'L', 'U', 'U', 'R', 'R', 'D', 'L', 'L', 'D', 'L', 'U', 'U', 'U', 'R', 'D', 'D', 'L', 'D', 'R', 'U', 'R', 'R', 'D', 'D', 'L', 'L', 'U', 'L', 'D', 'R', 'R', 'R', 'R']
150
Move tile "19" Right
Move tile "6" Up
Move tile "11" Left
Move tile "12" Left
Move tile "18" Down
Move tile "13" Left
Move tile "9" Up
Move 