/
blackjack.js
126 lines (106 loc) · 2.39 KB
/
blackjack.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import { RLIntRange, RLEnvironmentBase } from './base.js'
const SPADE = 0
const DIAMOND = 1
const HEART = 2
const CLUB = 3
class Deck {
constructor() {
this.cards = []
for (const suit of [SPADE, HEART, DIAMOND, CLUB]) {
for (let i = 1; i <= 13; i++) {
this.cards.push({ suit, value: i })
}
}
this.shuffle()
}
init() {
this.cards = []
for (const suit of [SPADE, HEART, DIAMOND, CLUB]) {
for (let i = 1; i <= 13; i++) {
this.cards.push({ suit, value: i })
}
}
this.shuffle()
}
shuffle() {
for (let i = this.cards.length - 1; i > 0; i--) {
const r = Math.floor(Math.random() * (i + 1))
;[this.cards[i], this.cards[r]] = [this.cards[r], this.cards[i]]
}
}
pop() {
return this.cards.pop()
}
}
/**
* Blackjack environment
*/
export default class BlackjackRLEnvironment extends RLEnvironmentBase {
constructor() {
super()
this._deck = new Deck()
this._dealer_hands = []
this._player_hands = []
this._done = false
this._reward = {
bust: -1,
win: 1,
step: 0,
}
this.reset()
}
get actions() {
return [[0, 1]]
}
get states() {
return [new RLIntRange(2, 31), new RLIntRange(1, 10), [0, 1]]
}
_sumhands(hands) {
let sumhands = hands.reduce((s, c) => s + Math.min(10, c.value), 0)
let usableace = sumhands <= 11 && hands.some(c => c.value === 1)
if (usableace) {
sumhands += 10
}
return [sumhands, usableace]
}
reset() {
super.reset()
this._deck.init()
this._dealer_hands = [this._deck.pop(), this._deck.pop()]
this._player_hands = [this._deck.pop(), this._deck.pop()]
this._done = false
return this.state()
}
state() {
const [sumhands, usableace] = this._sumhands(this._player_hands)
return [sumhands, Math.min(10, this._dealer_hands[0].value), usableace ? 1 : 0]
}
step(action) {
if (action[0] === 1) {
this._player_hands.push(this._deck.pop())
if (this._sumhands(this._player_hands)[0] > 21) {
this._done = true
return {
state: this.state(),
reward: this._reward.bust,
done: true,
}
}
return {
state: this.state(),
reward: this._reward.step,
done: false,
}
}
this._done = true
while (this._sumhands(this._dealer_hands)[0] < 17) {
this._dealer_hands.push(this._deck.pop())
}
const reward = this._sumhands(this._player_hands)[0] - this._sumhands(this._dealer_hands)[0]
return {
state: this.state(),
reward,
done: true,
}
}
}