In [1]:

import numpy as np
import pandas as pd
import copy
import matplotlib.pyplot as plt

np.set_printoptions(precision=4, suppress=True)

In [2]:
np.random.seed(10001)
sigma = 0.2
W = sigma * np.random.randn(100, 3)
np.savetxt('W.csv', W, delimiter=',')


np.random.seed(10001)
sigma = 0.5
u = sigma * np.random.randn(100)
np.savetxt('u.csv', u, delimiter=',')

In [3]:
A = np.array([
    [0, 1, 0],
    [0, 0, 1],
    [0.06, -0.47, 1.20]
])

B = np.array([0, 0, 1])[:, np.newaxis]

M = np.eye(3)
R = 0.1

T = 100

x0 = np.zeros((3,))[:, np.newaxis]
ut = 0  # zero policy

In [4]:
W = np.genfromtxt("W.csv", delimiter=",")
W.shape

(100, 3)

In [5]:
print(np.round(W[0, 0], 4))
print(np.round(W[1, 1], 4))
print(np.round(W[2, 2], 4))

-0.0411
0.1229
-0.2518


In [6]:
def phi(xt, ut):
    polyvec = [1]
    for xti in xt:
        polyvec.append(xti**2)
        polyvec.append(xti*ut)
    polyvec.append(xt)

def transition(xt, ut, wt):
    xt1 = A @ xt + B * ut + wt
    return xt1

def cost(xt, ut):
    return xt.T @ M @ xt + R * ut**2

def calc_ut(xt, K):
    return -K @ xt

def run_simulation(x0, u_arr, w_arr, iter_lim=None):
    assert u_arr.shape[0] == w_arr.shape[0]
    x_arr = []
    C_arr = []
    x = copy.deepcopy(x0)
    iter_count = 0
    for i, u in enumerate(u_arr):
        w = w_arr[i, :][:, np.newaxis]
        # print("w.shape =", w.shape)
        x = transition(x, u, w)
        C = cost(x, u)
        
        x_arr.append(x)
        C_arr.append(C)
        
        if iter_lim is not None:
            iter_count += 1
            if iter_count > iter_lim:
                break
    
    x_arr = np.array(x_arr).squeeze()
    C_arr = np.array(C_arr).squeeze()
    # print(C_arr.shape)
    J_arr = C_arr.cumsum()
    
    return x_arr, C_arr, J_arr

In [7]:
from scipy import linalg as la
P = la.solve_discrete_are(A, B, M, R)
K = la.solve(R + B.T.dot(P).dot(B), B.T.dot(P).dot(A))
K

array([[ 0.0582, -0.4535,  1.1459]])

In [8]:
T = 100
x = x0
x_arr = [x0]
u_arr = []
for t in range(T):
    u = calc_ut(x, K)
    w = W[t, :][:, np.newaxis]
    u_arr.append(u)
    x = transition(x, u, w)
u_arr = np.array(u_arr)

In [9]:
performance_dict = {}

# LQR
performance_dict["LQR"] = run_simulation(x0, u_arr, W)

# Zero policy
u_arr = np.zeros(T)
performance_dict["Zero"] = run_simulation(x0, u_arr, W)

# Random policy
u_arr = np.genfromtxt("u.csv", delimiter=",")
performance_dict["Random"] = run_simulation(x0, u_arr, W)