-
Notifications
You must be signed in to change notification settings - Fork 2
/
Learner.py
93 lines (76 loc) · 1.91 KB
/
Learner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
__author__ = 'philippe'
import World
import threading
import time
discount = 0.3
actions = World.actions
states = []
Q = {}
for i in range(World.x):
for j in range(World.y):
states.append((i, j))
for state in states:
temp = {}
for action in actions:
temp[action] = 0.1
World.set_cell_score(state, action, temp[action])
Q[state] = temp
for (i, j, c, w) in World.specials:
for action in actions:
Q[(i, j)][action] = w
World.set_cell_score((i, j), action, w)
def do_action(action):
s = World.player
r = -World.score
if action == actions[0]:
World.try_move(0, -1)
elif action == actions[1]:
World.try_move(0, 1)
elif action == actions[2]:
World.try_move(-1, 0)
elif action == actions[3]:
World.try_move(1, 0)
else:
return
s2 = World.player
r += World.score
return s, action, r, s2
def max_Q(s):
val = None
act = None
for a, q in Q[s].items():
if val is None or (q > val):
val = q
act = a
return act, val
def inc_Q(s, a, alpha, inc):
Q[s][a] *= 1 - alpha
Q[s][a] += alpha * inc
World.set_cell_score(s, a, Q[s][a])
def run():
global discount
time.sleep(1)
alpha = 1
t = 1
while True:
# Pick the right action
s = World.player
max_act, max_val = max_Q(s)
(s, a, r, s2) = do_action(max_act)
# Update Q
max_act, max_val = max_Q(s2)
inc_Q(s, a, alpha, r + discount * max_val)
# Check if the game has restarted
t += 1.0
if World.has_restarted():
World.restart_game()
time.sleep(0.01)
t = 1.0
# Update the learning rate
alpha = pow(t, -0.1)
# MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
time.sleep(0.1)
t = threading.Thread(target=run)
t.daemon = True
t.start()
World.start_game()