In [None]:
# import gym
# from gym import spaces
import import_ipynb
from search_env_base import SearchEnv

In [None]:
class FarmerWolfGooseGrainEnv(SearchEnv):
    """
    Environment for the Farmer, Wolf, Goose, and Grain river crossing puzzle.
    State is a tuple of two tuples: (left_bank, right_bank), each a tuple of entities on that side.
    Entities: 'F' (farmer), 'W' (wolf), 'G' (goose), 'R' (grain)
    The farmer can take one item/animal at a time. Cannot leave goose and grain or wolf and goose alone.
    """
    ENTITIES = ('F', 'W', 'G', 'R')

    def __init__(self):
        # All start on the left bank
        start_state = (('F', 'W', 'G', 'R'), ())
        goal_state = ((), ('F', 'W', 'G', 'R'))
        super().__init__(start_state, goal_state)
        self.state = start_state
        # self.action_space = None  # Not used
        # self.observation_space = None  # Not used

    def is_valid(self, state):
        """Check if the state is valid (no forbidden pairings)."""
        for bank in state:
            bank_set = set(bank)
            # If farmer is not present, check for forbidden pairs
            if 'F' not in bank_set:
                if 'W' in bank_set and 'G' in bank_set:
                    return False  # Wolf eats goose
                if 'G' in bank_set and 'R' in bank_set:
                    return False  # Goose eats grain
        return True

    def get_reachable_states(self, state=None):
        """
        Return a list of valid next states reachable from the current state.
        """
        if state is None:
            state = self.state
        left, right = state
        # Determine which side the farmer is on
        if 'F' in left:
            from_bank, to_bank = list(left), list(right)
            from_idx, to_idx = 0, 1
        else:
            from_bank, to_bank = list(right), list(left)
            from_idx, to_idx = 1, 0
        moves = []
        # Farmer can cross alone
        new_from = from_bank.copy()
        new_to = to_bank.copy()
        new_from.remove('F')
        new_to.append('F')
        new_state = [None, None]
        new_state[from_idx] = tuple(sorted(new_from))
        new_state[to_idx] = tuple(sorted(new_to))
        new_state = tuple(new_state)
        if self.is_valid(new_state):
            moves.append(new_state)
        # Farmer can take one other entity
        for entity in from_bank:
            if entity == 'F':
                continue
            new_from = from_bank.copy()
            new_to = to_bank.copy()
            new_from.remove('F')
            new_from.remove(entity)
            new_to.append('F')
            new_to.append(entity)
            new_state = [None, None]
            new_state[from_idx] = tuple(sorted(new_from))
            new_state[to_idx] = tuple(sorted(new_to))
            new_state = tuple(new_state)
            if self.is_valid(new_state):
                moves.append(new_state)
        return moves

    def step(self, action):
        """
        Action is the next state to move to (must be in get_reachable_states).
        Returns: next_state, reward, done, info
        """
        if action not in self.get_reachable_states(self.state):
            reward = -1.0  # Penalty for invalid move
            done = False
            info = {'invalid': True}
            return self.state, reward, done, info
        self.state = action
        done = self.state == self.goal_state
        reward = 1.0 if done else 0.0
        info = {'reachable': self.get_reachable_states(self.state)}
        return self.state, reward, done, info

    def reset(self):
        self.state = (('F', 'W', 'G', 'R'), ())
        return self.state

    def render(self, mode='human'):
        left, right = self.state
        print(f"Left bank: {left}")
        print(f"Right bank: {right}")
        print(f"Farmer is on the {'left' if 'F' in left else 'right'} bank.")

    def cost(self, from_state, to_state):
        """Return the cost of moving from from_state to to_state (always 1.0 for valid moves)."""
        return 1.0

    def is_goal(self, state):
        """Check if the state is a goal state using set equality for both banks."""
        left, right = state
        goal_left, goal_right = self.goal_state
        return set(left) == set(goal_left) and set(right) == set(goal_right)

In [None]:
#TEST
# fnv=FarmerWolfGooseGrainEnv()

In [None]:
# fnv.reset()

In [None]:
# actions=fnv.get_reachable_states()

In [None]:
# actions

In [None]:
# fnv.step(action=actions[0])