In [1]:
# 必要なモジュールのインポート
from enum import Enum
from random import shuffle
import numpy as np
import random
import gym
from gym import spaces
import gymnasium as gym
from gymnasium import spaces


# Suitクラスの定義
class Suit(Enum):
    SPADE = '♠'
    CLUB = '♣'
    HEART = '♡'
    DIAMOND = '♢'
    def __str__(self):
        return self.value
    def __repr__(self):
        return f"Suit.{self.name}"

# Numberクラスの定義
class Number(Enum):
    ACE = (1, 'A')
    TWO = (2, '2')
    THREE = (3, '3')
    FOUR = (4, '4')
    FIVE = (5, '5')
    SIX = (6, '6')
    SEVEN = (7, '7')
    EIGHT = (8, '8')
    NINE = (9, '9')
    TEN = (10, '10')
    JACK = (11, 'J')
    QUEEN = (12, 'Q')
    KING = (13, 'K')

    def __init__(self, val, string):
        self.val = val
        self.string = string

    def __str__(self):
        return self.string

    def __repr__(self):
        return f"Number.{self.name}"

# Cardクラスの定義
class Card:
    def __init__(self, suit, number):
        if not (isinstance(suit, Suit) and isinstance(number, Number)):
            raise ValueError
        self.suit = suit
        self.number = number

    def __str__(self):
        return str(self.suit) + str(self.number)

    def __repr__(self):
        return f"Card({self.__str__()})"

    def __eq__(self, other):
        return (self.suit, self.number) == (other.suit, other.number)

# Handクラスの定義
class Hand(list):
    def __init__(self, card_list):
        super().__init__(i for i in card_list)

    def check_number(self):
        number_list = [i.number.val for i in self]
        return number_list

    def check_suit(self):
        suit_list = [str(i.suit) for i in self]
        return suit_list

    def choice(self, card):
        if card in self:
            self.remove(card)
            return card
        else:
            raise ValueError

    def check(self, card):
        return card in self

# Deckクラスの定義
class Deck(list):
    def __init__(self):
        super().__init__(Card(suit, number) for suit in Suit for number in Number)
        self.shuffle()

    def shuffle(self):
        shuffle(self)

    def draw(self):
        return self.pop()

    def deal(self, players_num):
        cards = [Hand(i) for i in np.array_split(self, players_num)]
        self.clear()
        return cards

# Stateクラスの定義
# ゲームの状態
class State:
    # 初期化
    def __init__(self, players_num=4,field_cards=None, players_cards=None,turn_player=None,pass_count=None,out_player=None):
        if players_cards==None:
            deck = Deck()
            self.players_cards=deck.deal(players_num)
            self.players_num=players_num
            self.field_cards=np.zeros((4,13), dtype='int64')
            self.start_flags=[0]*self.players_num
            self.pass_count=[0]*self.players_num
            self.out_player=[]
            self.all_cards=[[str(Card(suit, number))  for number in Number] for suit in Suit]
            for players_number in range(players_num):
                self.start_flags[players_number]=self.choice_seven(hand=self.players_cards[players_number])
            self.turn_player=self.start_flags.index(1)
        else:
            self.players_cards=players_cards
            self.field_cards=field_cards
            self.players_num=players_num
            self.turn_player=turn_player
            self.pass_count=pass_count
            self.out_player=out_player
            self.all_cards=[[str(Card(suit, number))  for number in Number] for suit in Suit]


    #7のカードを出す
    def choice_seven(self,hand):
        start_flag=0
        for card in [Card(suit,Number.SEVEN) for suit in Suit]:
            if hand.check(Card(Suit.DIAMOND,Number.SEVEN))==True:
                start_flag=1
            if hand.check(card)==True:
                self.put_card(hand.choice(card))
        return start_flag

    def choice_card(self,hand,card):
        hand.choice(card)

    #場にカードを出す
    def put_card(self,card):
        num=10
        for s,i in zip(Suit,range(4)):
            if card.suit==s:
                num=i
        #state.my_hands().remove(card)
        self.field_cards[num][card.number.val-1]=int(1)


    # 場で出せる手のリスト取得
    # 3パスの人がいた時、未対応
    def legal_actions(self):
        actions = []
        for suit,n in zip(Suit,range(4)):

            if self.field_cards[n][0:6][::-1].tolist().count(1)!=6:
                actions.append(Card(suit,self.num_to_Enum(6-self.field_cards[n][0:6][::-1].tolist().index(0))))

            if self.field_cards[n][7:13].tolist().count(1)!=6:
                actions.append(Card(suit,self.num_to_Enum(8+self.field_cards[n][7:13].tolist().index(0))))
        return actions


    # 自分が出せる手のリスト取得
    def my_actions(self):
        actions = []
        for legal in self.legal_actions():
            if self.players_cards[self.turn_player].check(legal)==True:
                actions.append(legal)
        return actions
    def my_actions_str(self):
        actions = []
        for legal in self.legal_actions():
            if self.players_cards[self.turn_player].check(legal)==True:
                actions.append(legal)
        return [str(i) for i in actions]

    # 自分の手札取得
    def my_hands(self):
        return self.players_cards[self.turn_player]
    def my_hands_str(self):
        return [str(i) for i in self.players_cards[self.turn_player]]


    def num_to_Enum(self,num):
        enum_list=[Number.ACE,Number.TWO,Number.THREE,Number.FOUR,
                   Number.FIVE,Number.SIX,Number.SEVEN,Number.EIGHT,
                   Number.NINE,Number.TEN,Number.JACK,Number.QUEEN,
                   Number.KING]
        return enum_list[num-1]


    # 次の状態の取得
    def next(self, action):
        if self.my_actions()==[]:
            self.pass_count[self.turn_player]+=1
            self.pass_check()
        else:
            self.players_cards[self.turn_player].remove(action)
            self.put_card(action)
        #次のプレイヤーに
        self.next_player()
        return State(players_num=self.players_num,field_cards=self.field_cards, players_cards=self.players_cards,turn_player=self.turn_player,pass_count=self.pass_count,out_player=self.out_player)

    #次のプレイヤーの取得
    def next_player(self):
        flag=0
        while flag==0:
            if self.turn_player+1>=self.players_num:
                self.turn_player=self.turn_player+1-self.players_num
            else:
                self.turn_player+=1

            if self.turn_player not in self.out_player:
                flag=1

    #パスの上限判定
    def pass_check(self):
        out_list=self.out_player
        if self.pass_count[self.turn_player]>3:
            for card in self.my_hands():
                self.put_card(card)
            out_list.append(self.turn_player)

            self.out_player=out_list

    def to_str(self,num):
        return str(num)

    #勝ち負け判定
    def is_done(self):
        return len(self.my_hands())==0


    # 状態表示
    def __str__(self):
        str = ''
        field_cards=self.field_cards.tolist()
        out_list=[list(map(mul,self.all_cards[i],field_cards[i])) for i in range(4)]
        str += "場のカード\n\n"
        for i in range(len(out_list)):
            minilist=out_list[i]
            for j in range(len(minilist)):
                if minilist[j] == "":
                    str += " -- "
                else:
                    str +=" "+minilist[j]+" "
            str += '\n'
        num=self.to_str(self.turn_player)
        pass_cnt=self.to_str(self.pass_count[self.turn_player])
        str+="\nプレイヤー"+num+"番　　パス回数"+pass_cnt+"\n"
        str += "\nあなたの手札\n"

        out_list=self.my_hands_str()
        for i in range(len(out_list)):
            str+=out_list[i]
            str+=" "

        str += "\n\n出せるカード\n"

        out_list=self.my_actions_str()
        for i in range(len(out_list)):
            str+=out_list[i]
            str+=" "

        str += "\n"

        return str

# SevensEnvクラスの定義
class SevensEnv(gym.Env):
    def __init__(self):
        super(SevensEnv, self).__init__()
        self.state = State()
        # Define action and observation space
        # Assuming the action space is discrete for each possible card
        self.action_space = spaces.Discrete(52)  # Total number of cards
        # Observation space can be a representation of the field and hand cards
        self.observation_space = spaces.Dict({
            'field_cards': spaces.Box(low=0, high=1, shape=(4, 13), dtype=np.int8),
            'hand_cards': spaces.MultiBinary(52),
        })
        self.player_id = 0  # Assuming player 0 is the learning agent

    def step(self, action):
        # Apply action to the environment
        if action is not None:
            card = self.action_to_card(action)
            self.state = self.state.next(card)
        else:
            # Handle pass action
            self.state = self.state.next(None)

        # Observe the new state
        observation = self._get_obs()

        # Compute reward
        reward = self._compute_reward()

        # Check if the game is done
        done = self.state.is_done()

        info = {}

        return observation, reward, done, info

    def reset(self):
        self.state = State()
        return self._get_obs()

    def render(self, mode='human'):
        print(self.state)

    def _get_obs(self):
        # Create an observation dictionary
        field_cards = self.state.field_cards.copy()
        hand_cards = self._hand_cards_to_binary(self.state.my_hands())
        return {'field_cards': field_cards, 'hand_cards': hand_cards}

    def _compute_reward(self):
        # Define your reward function here
        # For example, +1 for winning, -1 for losing, 0 otherwise
        if self.state.is_done():
            return 1.0  # You can adjust this as per your reward strategy
        else:
            return 0.0

    def action_to_card(self, action):
        # Convert action (int) to Card object
        suit_index = action // 13
        number_index = action % 13
        suit = list(Suit)[suit_index]
        number = list(Number)[number_index]
        return Card(suit, number)

    def _hand_cards_to_binary(self, hand):
        # Convert hand cards to a binary vector of length 52
        hand_cards = np.zeros(52, dtype=int)
        for card in hand:
            index = self.card_to_action(card)
            hand_cards[index] = 1
        return hand_cards

    def card_to_action(self, card):
        # Convert Card object to action (int)
        suit_index = list(Suit).index(card.suit)
        number_index = list(Number).index(card.number)
        return suit_index * 13 + number_index

# エージェントの訓練コード
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env

# 環境のチェック
env = SevensEnv()
check_env(env)

# エージェントのインスタンス化
model = DQN('MlpPolicy', env, verbose=1)

# エージェントの訓練
model.learn(total_timesteps=100000)

# エージェントの保存
model.save("sevens_dqn_agent")


TypeError: The reset() method must accept a `seed` parameter

In [None]:
import gym
from gym import spaces
import numpy as np

class SevensEnv(gym.Env):
    def __init__(self):
        super(SevensEnv, self).__init__()
        self.state = State()
        # Define action and observation space
        # Assuming the action space is discrete for each possible card
        self.action_space = spaces.Discrete(52)  # Total number of cards
        # Observation space can be a representation of the field and hand cards
        self.observation_space = spaces.Dict({
            'field_cards': spaces.Box(low=0, high=1, shape=(4, 13), dtype=np.int8),
            'hand_cards': spaces.MultiBinary(52),
        })
        self.player_id = 0  # Assuming player 0 is the learning agent

    def step(self, action):
        # Apply action to the environment
        if action is not None:
            card = self.action_to_card(action)
            self.state = self.state.next(card)
        else:
            # Handle pass action
            self.state = self.state.next(None)

        # Observe the new state
        observation = self._get_obs()

        # Compute reward
        reward = self._compute_reward()

        # Check if the game is done
        done = self.state.is_done()

        info = {}

        return observation, reward, done, info

    def reset(self):
        self.state = State()
        return self._get_obs()

    def render(self, mode='human'):
        print(self.state)

    def _get_obs(self):
        # Create an observation dictionary
        field_cards = self.state.field_cards.copy()
        hand_cards = self._hand_cards_to_binary(self.state.my_hands())
        return {'field_cards': field_cards, 'hand_cards': hand_cards}

    def _compute_reward(self):
        # Define your reward function here
        # For example, +1 for winning, -1 for losing, 0 otherwise
        if self.state.is_done():
            return 1.0  # You can adjust this as per your reward strategy
        else:
            return 0.0

    def action_to_card(self, action):
        # Convert action (int) to Card object
        suit_index = action // 13
        number_index = action % 13
        suit = list(Suit)[suit_index]
        number = list(Number)[number_index]
        return Card(suit, number)

    def _hand_cards_to_binary(self, hand):
        # Convert hand cards to a binary vector of length 52
        hand_cards = np.zeros(52, dtype=int)
        for card in hand:
            index = self.card_to_action(card)
            hand_cards[index] = 1
        return hand_cards

    def card_to_action(self, card):
        # Convert Card object to action (int)
        suit_index = list(Suit).index(card.suit)
        number_index = list(Number).index(card.number)
        return suit_index * 13 + number_index

Collecting stable-baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl.metadata (5.1 kB)
Collecting gymnasium<0.30,>=0.28.1 (from stable-baselines3)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading stable_baselines3-2.3.2-py3-none-any.whl (182 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.3/182.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium, stable-baselines3
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1 stable-baselines3-2.3.

In [None]:
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env

# Check the environment
env = SevensEnv()
check_env(env)

# Instantiate the agent
model = DQN('MlpPolicy', env, verbose=1)

# Train the agent
model.learn(total_timesteps=100000)

# Save the agent
model.save("sevens_dqn_agent")

# Load the trained agent
# model = DQN.load("sevens_dqn_agent")

NameError: name 'State' is not defined