In [1]:
import numpy as np
import gym
import gym_custom_envs
import time
import scipy.linalg as alg
from gym import wrappers
env = gym.make("CartPoleContinuous-v0")

In [2]:
mc = env.masscart
mp = env.masspole
l = env.length
g = env.gravity

In [3]:
M = np.matrix([
    [mc+mp, mp*l],
    [mp*l, mp*(l**2)]
])

print("M = ")
print(M, '\n')

iM = np.linalg.inv(M)

print("iM = ")
print(iM, '\n')
# C(q, q_dot) will be zero as q_dot is zero

dtau_g = np.matrix([
    [0, 0],
    [0, mp*g*l]
])

print("dtau_g = ")
print(dtau_g, '\n')


b = np.matrix([
    [1., 0],
    [0., 0]
])

print("b")
print(b, '\n')

M = 
[[1.1   0.05 ]
 [0.05  0.025]] 

iM = 
[[ 1. -2.]
 [-2. 44.]] 

dtau_g = 
[[0.   0.  ]
 [0.   0.49]] 

b
[[1. 0.]
 [0. 0.]] 



In [4]:
def lqr(A, B, Q, R):

    S = np.matrix(alg.solve_continuous_are(A, B, Q, R))
    
    K = np.matrix(alg.inv(R)*B.T*S)
        
    return K, S

In [5]:
A = np.concatenate((
    np.concatenate((np.zeros((2,2)), np.eye(2)), axis=1),
    np.concatenate((np.dot(iM, dtau_g), np.zeros((2,2))), axis=1)
), axis=0)
    
print("A = ")    
print(A, '\n')   

B = np.concatenate((
    np.zeros((2,2)),
    np.dot(iM, b)
), axis=0)
    
print("B = ")    
print(B, '\n')   

Q = np.matrix([
    [10., 0, 0, 0],
    [0, 10., 0, 0],
    [0, 0, 1., 0],
    [0, 0, 0, 1.]
])
print("Q = ")    
print(Q, '\n')   

R = np.eye(2)
print("R = ")    
print(R, '\n')   



A = 
[[ 0.    0.    1.    0.  ]
 [ 0.    0.    0.    1.  ]
 [ 0.   -0.98  0.    0.  ]
 [ 0.   21.56  0.    0.  ]] 

B = 
[[ 0.  0.]
 [ 0.  0.]
 [ 1.  0.]
 [-2.  0.]] 

Q = 
[[10.  0.  0.  0.]
 [ 0. 10.  0.  0.]
 [ 0.  0.  1.  0.]
 [ 0.  0.  0.  1.]] 

R = 
[[1. 0.]
 [0. 1.]] 



In [6]:
K, S = lqr(A, B, Q, R)

print("K = ")
print(K, '\n')
print("S = ")
print(S, '\n')

K = 
[[ -3.16227766 -37.08642453  -4.23995135  -8.18258944]
 [  0.           0.           0.           0.        ]] 

S = 
[[ 13.40790344  25.8756198    8.48859373   5.8254357 ]
 [ 25.8756198  151.6477398   28.86834548  32.977385  ]
 [  8.48859373  28.86834548   8.74105515   6.49050325]
 [  5.8254357   32.977385     6.49050325   7.33654635]] 



In [7]:
from IPython.display import clear_output
env = gym.wrappers.Monitor(env, "vid", video_callable=lambda i: True,force=True)
win = 0
number = 10
for i in range(number):
    s = env.reset().reshape(1, 4)
    done = False
    while not done:
        a = -np.dot(K, s.T)[0, 0]
#         print(a)
        ns, r, done, _ = env.step(a)
        env.render()
        s = ns.reshape(1, 4)
#         clear_output(wait=True)
#         time.sleep(0.005)
    if abs(ns[1]) < np.deg2rad(1):
        win += 1    
print('successfully reached goal ', win, 'times out of ', number)
env.close()        

successfully reached goal  10 times out of  10
