In [None]:
import bisect
import numpy as np

with open('sample.txt', 'r') as f:
  lines = f.read().splitlines()

plan = np.asarray(list(map(list, lines)), dtype=int)

In [None]:
# State space: (i, j, d, r)
# where: i, j: coordinates
#        d:    direction, from [0, 4)
#        r:    runway, from [0, 3)

directions = [
  np.asarray([-1,  0]),   # 0 - Up
  np.asarray([ 0,  1]),   # 1 - Right
  np.asarray([ 1,  0]),   # 2 - Down
  np.asarray([ 0, -1]),   # 3 - Left
]

up, right, down, left = 0, 1, 2, 3

# Open list for A* algorithm
frontier = [
  (0, 0, down, 2),    # Start, facing down, runway of 2
  (0, 0, right, 2),   # Start, facing right, runway of 2
]

# Lowest cost for "g" in A* we've seen per state
g_cost = np.ones((*plan.shape, 4, 3)) * np.inf

# Initialize cost for starting positions
for n in frontier:
  g_cost[n] = 0

goal_coords = (plan.shape[0] - 1, plan.shape[1] - 1)

def is_goal(node):
  i, j = node[:2]
  return (i, j) == goal_coords

# Heuristic for A* algorithm. We'll use plain Manhattan distance.
def h(node):
  i, j = node[:2]
  return abs(goal_coords[0] - i) + abs(goal_coords[1] - j)

# Given a node, returns a list of valid successor nodes (neighbors)
# reachable from the input node
def neighbors(node):
  i, j, d, r = node

  # Gather candidate steps, which consist of tuples (d_prime, r_prime) where:
  #  - d_prime: the new direction
  #  - r_prime: the runway left
  # First, consider turning 90 degrees left/right from current direction.
  candidates = [
    ((d + 3) % len(directions), 2),
    ((d + 1) % len(directions), 2),
  ]

  # Runway left to proceed straight ahead?
  if r > 0:
    candidates.append((d, r - 1))

  # Gather successor nodes, filter out those that would go out of bounds
  result = []
  for (d_prime, r_prime) in candidates:
    i_prime, j_prime = i + directions[d_prime][0], j + directions[d_prime][1]
    if 0 <= i_prime < plan.shape[0] and 0 <= j_prime < plan.shape[1]:
      result.append((i_prime, j_prime, d_prime, r_prime))

  return result

while frontier:
  # Elements in q are ordered by f cost
  q = frontier.pop(0)
  #print(f'q: {q}')

  if is_goal(q):
    print(f'  GOAL REACHED, g_cost: {g_cost[q]}')
    break

  for n in neighbors(q):
    # Get g for that neighbor, based on g(q) plus heat loss cost for entering the new cell
    g_n = g_cost[q] + plan[n[:2]]

    # If g_n is cheaper than the best cost for that point in state space so far, update g_cost
    # and insert into frontier for further exploration
    if g_n < g_cost[n]:
      g_cost[n] = g_n
      bisect.insort(frontier, n, key=lambda x: g_cost[x] + h(x))