/
mountaincar.js
100 lines (86 loc) · 1.97 KB
/
mountaincar.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import { RLRealRange, RLEnvironmentBase } from './base.js'
/**
* MountainCar environment
*/
export default class MountainCarRLEnvironment extends RLEnvironmentBase {
constructor() {
super()
this._position = 0
this._velocity = 0
this._max_position = 0.6
this._min_position = -1.2
this._max_velocity = 0.07
this._goal_position = 0.5
this._goal_velocity = 0
this._force = 0.001
this._g = 0.0025
this._max_step = 200
this._reward = {
step: -1,
goal: -1,
fail: -1,
}
}
get actions() {
return [[0, 1, 2]]
}
get states() {
return [new RLRealRange(-1.2, 0.6), new RLRealRange(-0.07, 0.07)]
}
set reward(value) {
this._reward = {
step: -1,
goal: -1,
fail: -1,
}
if (value === 'achieve') {
const _this = this
this._reward = {
step: 0,
get goal() {
return (
-Math.abs(_this._position - _this._goal_position) +
Math.abs(_this._velocity - _this._goal_velocity)
)
},
get fail() {
return (
-Math.abs(_this._position - _this._goal_position) +
Math.abs(_this._velocity - _this._goal_velocity)
)
},
}
}
}
reset() {
super.reset()
this._position = Math.random() * 0.2 - 0.6
this._velocity = 0
return this.state()
}
state() {
return [this._position, this._velocity]
}
setState(state) {
this._position = state[0]
this._velocity = state[1]
}
test(state, action) {
let [p, v] = state
v += (action[0] - 1) * this._force + Math.cos(3 * p) * -this._g
v = Math.abs(v) > this._max_velocity ? Math.sign(v) * this._max_velocity : v
p += v
p = p > this._max_position ? this._max_position : p < this._min_position ? this._min_position : p
if (p === this._min_position && v < 0) {
v = 0
}
const fail = this.epoch >= this._max_step
const done = (p >= this._goal_position && v >= this._goal_velocity) || fail
const reward = fail ? this._reward.fail : done ? this._reward.goal : this._reward.step
return {
state: [p, v],
reward,
done,
}
}
}