-
Notifications
You must be signed in to change notification settings - Fork 15
/
learn.py
112 lines (93 loc) · 3.64 KB
/
learn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import argparse
from environment import Env
from keras import Sequential, layers
import matplotlib.pyplot as plt
import numpy as np
from osm_request import OGraph
# argparse
parser = argparse.ArgumentParser()
parser.add_argument('-l', '--location', type=str, default="Upper West Side, Manhattan, New York City, New York, USA")
parser.add_argument('-c', '--cars', type=int, default=50)
parser.add_argument('-dt', type=float, default=1/1000)
parser.add_argument('-x', '--agent', type=int, default=17)
parser.add_argument('-e', '--episodes', type=int, default=40)
parser.add_argument('-a', '--animate', action='store_true')
def main(
location="Upper West Side, Manhattan, New York City, New York, USA",
cars=50,
dt=1/1000,
agent=17,
num_episodes=40,
animate=False
):
"""
:param location: str: a geo-codable place, like "Harlem, NY" or "Kigali, Rwanda"
:param dt: float: time step for each frame
:param cars: int: the total number of cars to simulate
:param agent: int: the number of the car that will be the learning agent (must be less than cars)
:param num_episodes: int: the number of episodes to run
:param animate: bool: whether to animate the simulation
:return:
"""
graph = OGraph(location, save=True)
# initialize the environment for the learning agent
env = Env(n=cars, graph=graph, agent=agent, dt=dt, animate=animate)
# initialize the Keras training model
model = Sequential()
model.add(layers.InputLayer(batch_input_shape=(1, 10)))
model.add(layers.Dense(10, activation='sigmoid'))
model.add(layers.Dense(2, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
# now execute Q learning
y = 0.95
eps = 0.5
decay_factor = 0.80
r_avg_list = []
r_sum_list = []
file = open('diag.txt', 'w')
for i in range(num_episodes):
print("Episode {} of {}".format(i + 1, num_episodes))
eps *= decay_factor
r_sum = 0
done = False
diag_action = 0
diag_reward = 0
state = env.reset((i, num_episodes))
while not done:
env.reset((i, num_episodes))
rand = np.random.random()
if rand < eps:
action = np.random.randint(0, 2)
else:
action = np.argmax(model.predict(np.identity(10)[state:state + 1]))
new_s, r, done, _ = env.step(action=action, num=(i, num_episodes))
target = r + y * np.max(model.predict(np.identity(10)[new_s:new_s + 1]))
target_vec = model.predict(np.identity(10)[state:state + 1])[0]
target_vec[action] = target
model.fit(np.identity(10)[state:state + 1], target_vec.reshape(-1, 2), epochs=1, verbose=0)
state = new_s
r_sum += r
print('Action: {}, Reward: {}'.format(action, r))
file.write('Action: {}, Reward: {}'.format(action, round(r, 2)))
diag_action += action
diag_reward += r
r_avg_list.append(r_sum)
r_sum_list.append(sum(r_avg_list) / (i + 1))
file.write('Episode: {}, Total Rewards: {} \n'.format(i, round(r_sum, 2)))
file.close()
plt.plot(np.arange(num_episodes), r_sum_list)
plt.xlabel('Game number')
plt.ylabel('Average reward per game')
plt.suptitle('Average reward per game for car no. {}'.format(agent))
plt.savefig('avg_rewards.png')
return
if __name__ == '__main__':
args = parser.parse_args()
main(
location=args.location,
cars=args.cars,
dt=args.dt,
agent=args.agent,
num_episodes=args.episodes,
animate=args.animate
)