In [None]:
%pip install gym rich gymnasium

Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0


In [None]:
class Card:
    def __init__(self, value, suit, visible=False, bonus=False):
        """
        Initialize a card with given value, suit, visibility, and bonus status.

        Parameters:
        - value (int): The rank of the card (0 to 12, where 0 = Ace and 12 = King).
        - suit (int): The suit of the card (0 to 3).
        - visible (bool): Whether the card is face-up.
        - bonus (bool): Whether this card has been given a bonus for moving to the foundation.
        """
        self.value = value  # 0 to 12
        self.suit = suit  # 0 to 3, even suits are red, odd are black
        # 0 - ♥ (hearts) (red)
        # 1 - ♦ (diamonds) (red)
        # 2 - ♣ (clubs) (black)
        # 3 - ♠ (spades) (black)

        self.visible = visible  # Face-up or face-down
        self.bonus = bonus  # Bonus flag to prevent duplicate rewards

    def __repr__(self):
        visibility = "Visible" if self.visible else "Hidden"
        return f"Card(value={self.value}, suit={self.suit}, {visibility}, bonus={self.bonus})"


In [None]:
import gymnasium as gym
from gymnasium import spaces
import random
from rich.console import Console
from rich.text import Text

# Initialize a Console object from the rich library for printing with styles
console = Console()

class SolitaireEnv(gym.Env):
    def __init__(self):
        super(SolitaireEnv, self).__init__()
        # The action space now includes three parts: action type, source column, destination column
        self.action_space = spaces.Tuple((
            spaces.Discrete(3),    # Action type: 0 (Move within tableau), 1 (Draw card), 2 (Move to foundation)
            spaces.Discrete(7),    # Source column (0-6)
            spaces.Discrete(7),    # Destination column (0-6)
            spaces.Discrete(4)     # Foundation piles (0-3, only used for foundation moves)
        ))
        # Define observation space with structured tableau, foundation, and draw pile
        self.observation_space = spaces.Dict({
            'tableau': spaces.MultiDiscrete([53] * 7 * 13),  # 7 columns, each with 13 slots
            'foundation': spaces.MultiDiscrete([14, 14, 14, 14]),  # Four foundation piles
            'draw_pile': spaces.Discrete(24)  # One card drawn at a time, 24 is the maximum number, since all other cards are on the tableau
        })

        # Track revealed cards from the draw pile
        self.revealed_cards = []
        self.draw_index = 0  # Tracks current position in draw pile

        self.tableau = None
        self.foundation = None
        self.draw_pile = None
        self.draw_pile_cycles = 3
        self.done = False
        self.reward = 0
        self.colors = {
            0: "red",
            1: "red",
            2: "black",
            3: "black"
        }

        self._reset_game_state()

    def _reset_game_state(self):
        # Initialize the deck as a list of Card objects
        deck = [Card(value, suit) for suit in range(4) for value in range(13)]
        random.shuffle(deck)

        # Initialize tableau with some cards face-down
        self.tableau = [[deck.pop() for _ in range(i + 1)] for i in range(7)]
        for col in self.tableau:
            for card in col[:-1]:
                card.visible = False  # Face-down
            col[-1].visible = True  # Top card face-up

        # Foundation starts empty
        self.foundation = [[] for _ in range(4)]
        self.draw_pile = deck  # Remaining cards in the draw pile
        self.done = False
        self.reward = 0

    def reset(self, seed=None):
        """Resets the environment to the initial state."""
        self._reset_game_state()
        return self._get_observation(), {}

    def _get_observation(self):
        # This function will need to convert the complex Card objects to a simple format for Gym
        tableau_observation = [
            [card.value if card.visible else 0 for card in column] for column in self.tableau
        ]
        foundation_observation = [len(pile) for pile in self.foundation]
        draw_pile_observation = self.draw_pile[0].value if self.draw_pile else 0

        return {
            'tableau': tableau_observation,
            'foundation': foundation_observation,
            'draw_pile': draw_pile_observation
        }

    def step(self, action: list):
        # action: [int, list[int,int], int]
        # action_type: int
        # source: list[int,int] - [column, card in column]. Column numbers:
        # 0-6: tableau
        # 7-10: foundation
        # 11: draw pile
        # destination: int - column, no need  since all the cards will be moved on top of it

        action_type, source, destination = action
        self.reward -= 1  # Base penalty for each action

        if action_type == 0:  # Move Card within Tableau
            valid_move_made = self._move_within_tableau(source, destination)
            if not valid_move_made:
                print("The move isn't valid")
                self.reward -= 1  # Extra penalty for invalid move

        elif action_type == 1:  # Draw Card from Draw Pile
            self._draw_card()

        elif action_type == 2:  # Move Card to Foundation
            self._move_to_foundation(source[0])


        flipped_count = self._flip_visible_cards()
        self.reward += flipped_count * 20

        # Check if game is won (all foundations complete)
        if all(len(foundation) == 13 for foundation in self.foundation):
            self.done = True

        return self._get_observation(), self.reward, self.done, {}

    def _draw_card(self):
        # Reveal 1 card at a time from the draw pile
        if self.draw_pile:
            card = self.draw_pile.pop()
            card.visible = True
            self.revealed_cards.append(card)

        else:
            self.draw_pile = self.revealed_cards[::-1]  # Restart the draw pile if we reach the end
            self.revealed_cards = []
            self.draw_pile_cycles-=1
            if self.draw_pile_cycles < 0:
                self.reward -= 30



    def _is_alternating_color(self, object1, object2):
        # Check if the objects have alternating colors
        return self.colors[object1] != self.colors[object2]

    def _move_within_tableau(self, source: list[int], destination: int):
        if destination > 6 or destination < 0:
          print("Wrong destination column")
          return False
        # If the source is from the draw pile
        if source[0] == 11:
            if not self.revealed_cards:
                print("Invalid move: No cards revealed in the draw pile")
                return False  # No cards revealed in the draw pile
            # Use the last revealed card from the draw pile
            card_to_move = self.revealed_cards[-1]

            # Check if destination column is empty (only Kings can move to empty columns)
            if not self.tableau[destination]:
                if card_to_move.value == 12:
                    self.tableau[destination].append(card_to_move)
                    self.revealed_cards.pop()  # Remove from revealed list
                    return True
                else:
                    print("Invalid move: Only Kings can move to an empty column")
                    return False  # Only Kings can move to an empty column

            # Check if the move is valid based on the destination column's top card
            dest_card = self.tableau[destination][-1]
            if (card_to_move.value == dest_card.value - 1 and
                self._is_alternating_color(card_to_move.suit, dest_card.suit)):
                self.tableau[destination].append(card_to_move)
                self.revealed_cards.pop()  # Remove from revealed list
                return True

            print("Invalid move: Invalid move for draw pile card")
            return False  # Invalid move for draw pile card

        if 7 <= source[0] <= 10:
          suit = source[0] - 7
          if self.foundation[suit]:
              card_to_move = self.foundation[suit][-1]
              dest_card = self.tableau[destination][-1]
              if (card_to_move.value == dest_card.value - 1 and
                  self._is_alternating_color(card_to_move.suit, dest_card.suit)):
                  self.tableau[destination].append(card_to_move)
                  self.foundation[suit].pop()  # Remove from revealed list
                  return True
              return False
          else:
            return False

        if source[0] < 0:
            print("Invalid move: Wrong column number")
            return False
        if source[1] >= len(self.tableau[source[0]]):
            print("Invalid move: Wrong card index number")
            return False

        card_column = source[0]
        card_index = source[1]
        cards_to_move = self.tableau[card_column][card_index:]

        # Check if destination column is empty (only Kings can be moved to an empty column)
        if not self.tableau[destination]:
            if cards_to_move[0].value == 12:
                # Move the sequence
                self.tableau[destination].extend(cards_to_move)
                del self.tableau[card_column][card_index:]
                return True
            else:
                print("Invalid move: Only Kings can be moved to an empty column")
                return False  # Only Kings can be moved to an empty column

        # Check if the move is valid based on the destination column’s top card
        dest_card = self.tableau[destination][-1]
        if (cards_to_move[0].value == dest_card.value - 1 and
                self._is_alternating_color(cards_to_move[0].suit, dest_card.suit)):
            # Move the sequence
            self.tableau[destination].extend(cards_to_move)
            del self.tableau[card_column][card_index:]
            return True

        print("Invalid move: Invalid move")
        return False  # Move was invalid


    def _move_to_foundation(self, source): # source is int, since we move the top card of source to top of foundation
        # If source is the draw pile (denoted by 11), take the last revealed card
        if source == 11:
            if not self.revealed_cards:
                print("Invalid move: No revealed cards in draw pile")
                return False  # No revealed cards in draw pile
            card = self.revealed_cards[-1]
            foundation_index = card.suit # Determine foundation based on suit

            if len(self.foundation[foundation_index]) == card.value:
                # Move the card to the foundation and remove from revealed list
                self.foundation[foundation_index].append(card)
                self.revealed_cards.pop()
                if not card.bonus:
                    self.reward += 130
                    card.bonus = True
                return True

            print("Invalid move: Invalid move")
            return False  # Invalid move

        # Validate source column
        if source < 0 or not self.tableau[source]:
            print("Invalid move: No card to move")
            return False  # Invalid move, no card to move

        # Get the top card from the source column
        card = self.tableau[source][-1]
        foundation_index = card.suit  # Determine foundation pile based on suit

        # Check if the card can move to the foundation (must be in ascending order)
        if len(self.foundation[foundation_index]) == card.value:
            # Move the card to the foundation and remove from tableau
            self.foundation[foundation_index].append(self.tableau[source].pop())
            if not card.bonus:
                self.reward += 130
                card.bonus = True
            return True

        print("Invalid move: Invalid move")
        return False  # Move was invalid


    def _flip_visible_cards(self):
        flipped_count = 0
        for column in self.tableau:
            if column and not column[-1].visible:  # If the top card is face-down
                column[-1].visible = True  # Flip it face-up
                flipped_count += 1
        return flipped_count


    def render(self, mode='human'):
        # Foundations
        foundation_str = []
        for pile in self.foundation:
            if pile:
                card = pile[-1]
                suit = ['[bold red] ♥[/bold red]', '[bold red] ♦[/bold red]', ' ♣', ' ♠'][card.suit]
                # Apply color red for red suits (Diamonds and Hearts)
                foundation_str.append(f"| {card.value} {suit} |" if card.value != 0 else f"| A {suit} |")
            else:
                foundation_str.append("|     |")  # Empty foundation pile

        # Print foundation row
        console.print("Foundations:", "  ".join(foundation_str))

        # Tableau - display all 7 columns in a single row
        tableau_str = []
        for col in self.tableau:
            tableau_str.append(" ".join([f"┌─────┐" if card.visible else "┌─────┐" for card in col]))  # Card tops
            tableau_str.append(" ".join([f"| {card.value if card.visible else ' ?'}{' ' if card.visible and len(str(card.value)) != 2 else ''}{['[bold red] ♥[/bold red]', '[bold red] ♦[/bold red]', ' ♣', ' ♠'][card.suit] if card.visible else '  '}|" for card in col]).replace(" 0 ", " A "))  # Card values
            tableau_str.append(" ".join([f"|     |" for _ in col]))  # Empty space for spacing between cards
            tableau_str.append(" ".join([f"└─────┘" for _ in col]))  # Card bottoms

        # Print tableau columns in one row
        tableau_str = '\n'.join(tableau_str)
        console.print("Tableau:\n" + tableau_str)

        # Draw pile (remaining count, last 3 revealed, discarded count)
        draw_pile_display = f"Draw Pile: {len(self.draw_pile)} cards remaining"

        # Last 3 revealed cards (if any)
        last_three = [f"|{card.value if card.visible else ' ?'}{' ' if card.visible and len(str(card.value)) != 2 else ''}{['[bold red] ♥[/bold red]', '[bold red] ♦[/bold red]', ' ♣', ' ♠'][card.suit] if card.visible else '  '}|" for card in self.revealed_cards[-3:]]
        last_three_display = f"\nLast 3 Drawn: {' '.join(last_three)}"

        # Discarded cards (number of discarded cards)

        # Print the full draw pile row
        console.print(draw_pile_display, last_three_display)


In [None]:
from IPython.display import clear_output
import sys

def main():
    # Initialize the environment
    env = SolitaireEnv()
    done = False

    # Game loop
    while not done:
        # Render the current state
        clear_output(wait=True)
        print(f"\nReward: {env.reward}")
        env.render()

        # Get user input for action
        try:
            print("\nEnter your action:")
            action_type = int(input("Action Type (0: Move within tableau, 1: Draw card, 2: Move to foundation): \n"))
            if action_type == 0:  # Move within tableau
                source_col = int(input("Source Column (0-6 for tableau, 7-10 for foundation, 11 for draw pile): \n"))
                if source_col == 11:
                    source_card_idx= 0
                else:
                    source_card_idx = int(input("Source Card Index (starting from 0 for the bottom card): \n"))
                dest_col = int(input("Destination Column (0-6): \n"))
                action = [action_type, [source_col, source_card_idx], dest_col]
            elif action_type == 1:  # Draw card
                action = [action_type, [], 0]  # No source or destination needed for draw action
            elif action_type == 2:  # Move to foundation
                source_col = int(input("Source Column (0-6 for tableau, 11 for draw pile): \n"))
                action = [action_type, [source_col, 0], 0]  # No destination needed for foundation
            else:
                print("Invalid action type! Try again.")
                continue
        except ValueError:
            print("Invalid input! Please enter valid numbers.")
            continue

        # Take the action
        try:
            obs, reward, done, info = env.step(action)
            if done:
                print("Congratulations! You have completed the game!\n Your score: ", env.reward)
        except Exception as e:
            print(f"Error: {e}")
            sys.exit(1)

if __name__ == "__main__":
    main()



Reward: 0



Enter your action:


KeyboardInterrupt: Interrupted by user

In [None]:
!pip install stable_baselines3

Collecting stable_baselines3
  Downloading stable_baselines3-2.4.0-py3-none-any.whl.metadata (4.5 kB)
Downloading stable_baselines3-2.4.0-py3-none-any.whl (183 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: stable_baselines3
Successfully installed stable_baselines3-2.4.0


In [None]:
from stable_baselines3.common.env_checker import check_env

In [None]:
env = SolitaireEnv()
check_env(env, warn=True)



AssertionError: Error while checking key=foundation: The observation returned by `reset()` method must be a numpy array