-
Notifications
You must be signed in to change notification settings - Fork 2
/
frozenLake_q_learning_train.py
73 lines (46 loc) · 1.32 KB
/
frozenLake_q_learning_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gym
import numpy as np
import time, pickle, os
env = gym.make('FrozenLake-v0')
epsilon = 0.9
# min_epsilon = 0.1
# max_epsilon = 1.0
# decay_rate = 0.01
total_episodes = 10000
max_steps = 100
lr_rate = 0.81
gamma = 0.96
Q = np.zeros((env.observation_space.n, env.action_space.n))
def choose_action(state):
action=0
if np.random.uniform(0, 1) < epsilon:
action = env.action_space.sample()
else:
action = np.argmax(Q[state, :])
return action
def learn(state, state2, reward, action):
predict = Q[state, action]
target = reward + gamma * np.max(Q[state2, :])
Q[state, action] = Q[state, action] + lr_rate * (target - predict)
# Start
rewards=0
for episode in range(total_episodes):
state = env.reset()
t = 0
while t < max_steps:
env.render()
action = choose_action(state)
state2, reward, done, info = env.step(action)
learn(state, state2, reward, action)
state = state2
t += 1
rewards+=1
if done:
break
# epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode)
# os.system('clear')
time.sleep(0.1)
print ("Score over time: ", rewards/total_episodes)
print(Q)
with open("frozenLake_qTable.pkl", 'wb') as f:
pickle.dump(Q, f)