-
Notifications
You must be signed in to change notification settings - Fork 4
/
decisiontree_l0.py
127 lines (110 loc) · 5.03 KB
/
decisiontree_l0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import environment
# non robust version of the decision tree
# Decision Tree action search
def decisiontree_l0(X_old, car_id, action_space, params, Level_ratio):
X_old1 = X_old.copy()
discount = params.discount # discount factor
dR_drop = params.dR_drop
t_step_DT = params.t_step_DT
Q_init = -1e6
Q_value = [[Q_init]] * action_space.size
action_id = [[]] * action_space.size
Buffer = [[]] * 3
R1_max, R2_max, R3_max = -1e10, -1e10, -1e10
dist_comb = params.dist_comb
dist_id = 1
Buffer[0] = X_old1
if X_old1[4, car_id] == 1:
for id_1 in range(0, action_space.size):
k = 0
X_old1 = Buffer[k]
X_new, R1 = environment.environment(X_old1, car_id, id_1, t_step_DT, params, dist_id, Level_ratio)
R1_max = max(R1_max, R1)
if R1 < R1_max + dR_drop:
continue
Buffer[k + 1] = X_new
for id_2 in range(0, action_space.size):
k = 1
X_old1 = Buffer[k]
X_new, R2 = environment.environment(X_old1, car_id, id_2, t_step_DT, params, dist_id,
Level_ratio)
R2_max = max(R2_max, R2)
if R2 < R2_max + dR_drop:
continue
# Buffer[k+1]=X_new
# for id_3 in range(0, action_space.size):
# for dist_id in range(0, len(dist_comb)):
# k=2
# X_old1=Buffer[k]
# X_new, R3 = environment.environment(X_old1, car_id, id_3, t_step_DT, params, dist_id, Level_ratio)
# R3_max = max(R3_max, R3)
# if R3 < R3_max +dR_drop:
# continue
#
# if Q_value[id_1][0] == Q_init:
# Q_value[id_1] = [R1+R2*discount+ R3*discount**2]
# else:
# Q_value[id_1] = Q_value[id_1]+list([R1+R2*discount+ R3*discount**2])
# if action_id[id_1]==[]:
# action_id[id_1] = [[id_1, id_2, id_3]]
# else:
# action_id[id_1] = action_id[id_1] + list([[id_1, id_2, id_3]])
if Q_value[id_1][0] == Q_init:
Q_value[id_1] = [R1 + R2 * discount]
else:
Q_value[id_1] = Q_value[id_1] + list([R1 + R2 * discount])
if action_id[id_1] == []:
action_id[id_1] = [[id_1, id_2]]
else:
action_id[id_1] = action_id[id_1] + list([[id_1, id_2]])
else:
dist_id = 1 # dummy value
for id_1 in range(0, action_space.size):
k = 0
X_old1 = Buffer[k]
X_new, R1 = environment.environment(X_old1, car_id, id_1, t_step_DT, params, dist_id, Level_ratio)
R1_max = max(R1_max, R1)
if R1 < R1_max + dR_drop:
continue
Buffer[k + 1] = X_new
for id_2 in range(0, action_space.size):
k = 1
X_old1 = Buffer[k]
X_new, R2 = environment.environment(X_old1, car_id, id_2, t_step_DT, params, dist_id, Level_ratio)
R2_max = max(R2_max, R2)
if R2 < R2_max + dR_drop:
continue
# Buffer[k+1]=X_new
# for id_3 in range(0, action_space.size):
# k=2
# X_old1=Buffer[k]
# X_new, R3 = environment.environment(X_old1, car_id, id_3, t_step_DT, params, dist_id, Level_ratio)
# R3_max = max(R3_max, R3)
# if R3 < R3_max +dR_drop:
# continue
#
# if Q_value[id_1][0] == Q_init:
# Q_value[id_1] = [R1+R2*discount+ R3*discount**2]
# else:
# Q_value[id_1] = Q_value[id_1]+list([R1+R2*discount+ R3*discount**2])
# if action_id[id_1]==[]:
# action_id[id_1] = [[id_1, id_2, id_3]]
# else:
# action_id[id_1] = action_id[id_1] + list([[id_1, id_2, id_3]])
if Q_value[id_1][0] == Q_init:
Q_value[id_1] = [R1 + R2 * discount]
else:
Q_value[id_1] = Q_value[id_1] + list([R1 + R2 * discount])
if action_id[id_1] == []:
action_id[id_1] = [[id_1, id_2]]
else:
action_id[id_1] = action_id[id_1] + list([[id_1, id_2]])
#
Q_value_opt = [[]] * action_space.size
index_opt = [[]] * action_space.size
for id in range(0, action_space.size):
Q_value_opt[id] = max(Q_value[id])
index_opt[id] = Q_value[id].index(max(Q_value[id]))
id_opt = Q_value_opt.index(max(Q_value_opt))
Action_id = action_id[id_opt][index_opt[id_opt]]
return Q_value_opt, Action_id