In [1]:
import numpy as np
import gym
import gym_custom_envs
import time
import random
import scipy.linalg as alg
from gym import wrappers

In [2]:
from IPython.display import clear_output
env = gym.make("Quadrotor2D-v0")
env = gym.wrappers.Monitor(env, "vid")
# uncomment for testing environment below
# for i in range(5):
#     s = env.reset()
    
#     done = False
#     while not done:
#         a = np.array(np.random.uniform(low=-2.0, high=2.0, size=(2,)))
# #         a = np.zeros(2)
#         print(round(s[0], 3), round(s[1], 3), round(np.rad2deg(s[2]), 3))
#         ns, r, done, _ = env.step(a)
#         s = ns
#         env.render()
#         time.sleep(0.08)
#         clear_output(wait=True)

In [3]:
m = env.m
I = env.I
g = env.gravity
r = env.l/2

# M(q) & B(q) since C(q, q_dot) and Tau(q) is zero
M = np.matrix([
    [m, 0, 0],
    [0, m, 0],
    [0, 0, I],
])

b = np.matrix([
    np.zeros(2),
    [1, 1],
    [r, -r]
])

iM = np.linalg.inv(M)

print('M(q) = ')
print(M)

print('b(q) = ')
print(b)

print('iM(q) = ')
print(iM)

M(q) = 
[[0.2        0.         0.        ]
 [0.         0.2        0.        ]
 [0.         0.         0.00416667]]
b(q) = 
[[ 0.    0.  ]
 [ 1.    1.  ]
 [ 0.25 -0.25]]
iM(q) = 
[[  5.   0.   0.]
 [  0.   5.   0.]
 [  0.   0. 240.]]


In [4]:
def lqr(A, B, Q, R):

    S = np.matrix(alg.solve_continuous_are(A, B, Q, R))
    
    K = np.matrix(alg.inv(R)*B.T*S)
        
    return K, S

In [5]:
db = np.matrix([
    [0 , 0, -m*g],
    np.zeros(3),
    np.zeros(3)
    ])

A = np.concatenate((
    np.concatenate((np.zeros((3, 3)), np.eye(3)), axis=1),
    np.concatenate((np.dot(iM, db), np.zeros((3, 3))), axis=1)
), axis=0)

B = np.concatenate((
    np.zeros((3, 2)),
    np.dot(iM, b)
), axis=0)

Q = np.diag([10, 10, 10, 1, 1, (r/ np.pi)])

R = np.array([[0.02, 0], [0, 0.02]])

K, S = lqr(A, B, Q, R)

print('A = ')
print(A)
print('B = ')
print(B)

print('Q = ')
print(Q)

print('R = ')
print(R)

print('K = ')
print(K)

A = 
[[  0.   0.   0.   1.   0.   0.]
 [  0.   0.   0.   0.   1.   0.]
 [  0.   0.   0.   0.   0.   1.]
 [  0.   0. -10.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.]
 [  0.   0.   0.   0.   0.   0.]]
B = 
[[  0.   0.]
 [  0.   0.]
 [  0.   0.]
 [  0.   0.]
 [  5.   5.]
 [ 60. -60.]]
Q = 
[[10.          0.          0.          0.          0.          0.        ]
 [ 0.         10.          0.          0.          0.          0.        ]
 [ 0.          0.         10.          0.          0.          0.        ]
 [ 0.          0.          0.          1.          0.          0.        ]
 [ 0.          0.          0.          0.          1.          0.        ]
 [ 0.          0.          0.          0.          0.          0.07957747]]
R = 
[[0.02 0.  ]
 [0.   0.02]]
K = 
[[-15.8113883   15.8113883   23.56497713  -9.97592105   5.30681427
    1.54343332]
 [ 15.8113883   15.8113883  -23.56497713   9.97592105   5.30681427
   -1.54343332]]


In [6]:
for i in range(5):
    s = env.reset().reshape(1, 6)
    
    done = False
    
    while not done:
        K, S = lqr(A, B, Q, R)
        
        a = -np.dot(K, s.T)

#         print('Theta = ', round(np.rad2deg(s[0, 2]), 3))
#         print('u = ', a.T)
#         print("Reaction = ", round((a[0] + a[1])[0, 0] - m*g, 3))
#         print("Cost = ", round(np.dot(s, np.dot(S, s.T))[0, 0], 3))

        ns, r, done, _ = env.step([a[0], a[1]])
        s = ns.reshape(1, 6)
        env.render()
        
#         clear_output(wait=True)
#         time.sleep(0.008)