This repository has been archived by the owner on Mar 31, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_gym_env.py
87 lines (73 loc) · 2.21 KB
/
test_gym_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from engine.gym_env import PokerEnv
import numpy as np
# 0: Fold, 1: Call, 2: Check, 3: Raise
def call_check_bot(obs):
# Call if possible, otherwise check
if obs["legal_actions"][1] == 1:
return (1,0)
elif obs["legal_actions"][2] == 1:
return (2,0)
else:
return (0,0)
def raise_bot(obs):
# Raise if possible, otherwise call or check
if obs["legal_actions"][3] == 1:
min_raise = int(obs["min_raise"][0])
max_raise = int(obs["max_raise"][0])
return (3,max_raise)
else:
return call_check_bot(obs)
def random_bot(obs):
# Randomly choose a legal action
action_probs = np.random.rand(4)
action_probs = action_probs * obs["legal_actions"]
action = np.argmax(action_probs)
if action == 3:
min_raise = obs["min_raise"][0]
max_raise = obs["max_raise"][0]
raise_amount = np.random.randint(min_raise, max_raise+1)
return (action, raise_amount)
else:
return (action, 0)
def fold_bot(obs):
if obs["legal_actions"][0] == 1:
return (0,0)
else:
return call_check_bot(obs)
num_to_action = {0: "Fold", 1: "Call", 2: "Check", 3: "Raise"}
# Two player mode
env = PokerEnv(10)
(obs1, obs2), info = env.reset()
bot1, bot2 = random_bot, random_bot
print("\n"+"*"*50 + " Two player " + "*"*50)
print(obs1)
print(obs2)
done = False
while not done:
if obs1["is_my_turn"]:
action = bot1(obs1)
print(f"Bot1: {num_to_action[action[0]]} {action[1]}")
else:
action = bot2(obs2)
print(f"Bot2: {num_to_action[action[0]]} {action[1]}")
print("\n")
(obs1, obs2), (reward1, reward2), done, trunc, info = env.step(action)
if reward1 != 0:
print("New Round")
print(obs1, reward1, done)
print(obs2, reward2, done)
# Single player mode
env = PokerEnv(num_rounds=10, opp_bot=random_bot)
obs, info = env.reset()
bot = random_bot
print("\n"+"*"*50 + " Single Player " + "*"*50)
print(obs)
done = False
while not done:
action = bot(obs)
print(f"Bot1: {num_to_action[action[0]]} {action[1]}")
print("\n")
obs, reward, done, trunc, info = env.step(action)
if reward != 0:
print("New Round")
print(obs, reward, done)