-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhw6_2.py
69 lines (51 loc) · 1.64 KB
/
hw6_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
import utils_io
def exact_stage_cost(A, B, Q, R, W, n):
P_arr = []
K_arr = []
T = 100
dummy_terminal_val = np.zeros([n, n])
P_arr.append(dummy_terminal_val)
K_arr.append(dummy_terminal_val)
for _ in range(T)[::-1]:
P_next = P_arr[-1]
term_a = -np.linalg.inv((R + np.dot(np.dot(B.T, P_next), B)))
term_b = np.dot(np.dot(B.T, P_next), A)
K = np.dot(term_a, term_b)
K_arr.append(K)
k_r_k = np.dot(np.dot(K.T, R), K)
a_bk = A + np.dot(B, K)
P = Q + k_r_k + np.dot(np.dot(a_bk.T, P_next), a_bk)
P_arr.append(P)
P_ss = P_arr[-1]
K_ss = K_arr[-1]
utils_io.label('6.2')
print 'optimal policy:', K_ss
print 'calculated avg stage cost:', 0.5 * np.trace(np.dot(P_ss, W))
return K_ss
def simulated_stage_cost(A, B, K_ss, Q, R, W, n):
T = 10000
costs = []
x = np.zeros(n)
W_mean = np.zeros(n)
for time in range(T):
u = np.dot(K_ss, x)
cost = 0.5 * (np.dot(np.dot(x.T, Q), x) + np.dot(np.dot(u.T, R), u))
costs.append(cost)
w_sample = np.random.multivariate_normal(W_mean, W)
x = np.dot(A, x) + np.dot(B, u) + w_sample
print 'simulated average stage cost: ', np.mean(costs[int(T/10):])
def prob_2():
n = 4
rho = 0.1
sigma = 1
A = np.eye(n)
B = np.array([[1, -1, 0, 0], [0, 1, -1, 0], [0, 0, 1, -1], [0, 0, 0, 1]])
W = np.zeros([n, n])
W[n-1, n-1] = sigma ** 2
Q = np.eye(n)
R = rho * np.eye(n)
K_ss = exact_stage_cost(A, B, Q, R, W, n)
simulated_stage_cost(A, B, K_ss, Q, R, W, n)
if __name__ == '__main__':
prob_2()