# System Matrices Evaluation and Control Input Derivation

In [1]:
import gym
import numpy as np
import tensorflow as tf
import pickle
import os
from os import path

## Import matrices for linear control systems

In [2]:
model = tf.keras.models.load_model(
    './cartpole_system_model', custom_objects=None, compile=True, options=None
)

2022-03-15 01:10:50.123978: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
np_weights = model.get_weights()
print(model.A.get_weights())
print(model.B.get_weights())

[array([[ 1.0000000e+00,  2.8594382e-04, -4.5564782e-08,  8.6408173e-04],
       [ 2.0000003e-02,  9.9994367e-01,  2.9265443e-08, -7.2883966e-04],
       [-3.1767406e-08, -1.3775987e-02,  1.0000000e+00,  3.1655234e-01],
       [-3.4968859e-09,  2.1930250e-04,  2.0000009e-02,  9.9978548e-01]],
      dtype=float32)]
[array([[-1.4053483e-09,  1.9509907e-01, -1.3254321e-08, -2.9150683e-01]],
      dtype=float32)]


In [4]:
A = np_weights[0]
B = np_weights[1].T
print("A Matrix")
print(A)
print("B Matrix")
print(B)

A Matrix
[[ 1.0000000e+00  2.8594382e-04 -4.5564782e-08  8.6408173e-04]
 [ 2.0000003e-02  9.9994367e-01  2.9265443e-08 -7.2883966e-04]
 [-3.1767406e-08 -1.3775987e-02  1.0000000e+00  3.1655234e-01]
 [-3.4968859e-09  2.1930250e-04  2.0000009e-02  9.9978548e-01]]
B Matrix
[[-1.4053483e-09]
 [ 1.9509907e-01]
 [-1.3254321e-08]
 [-2.9150683e-01]]


## Deriving K

Rewrite the control system equation using $u = - Kx$.

$$
\begin{align}
    \dot x &= Ax - Bu \\
    \dot x &= Ax - BKx \\
\end{align}
$$

Define the target control system with the desired eigenvalue and eigenvector placements as $\dot x = Tx$.

Derive the state space to control input transformation $K$.

$$
\begin{align}
    \dot x 
        &= Tx \\
        &= Ax - BKx \\
    Tx &= Ax - BKx \\
    T &= A - BK \\
    K &= - B^{-1}_{left} (T - A) \\
\end{align}
$$

where $B^{-1}_{left}$ is the left inverse of $B$, which can be derived as,

$$
\begin{align}
    (A^T A)^{-1} A^T A &= I \\
    (A^T A)^{-1} A^T &= A^{-1}_{left} \\
\end{align}
$$

In [5]:
L, V = np.linalg.eig(A)
print("Eigenvalues")
print(L)
print("Eigenvectors")
print(V)

Eigenvalues
[0.9203467 0.9974113 1.0025    1.0794711]
Eigenvectors
[[-0.0026594  -0.12393044  0.12821224  0.0026406 ]
 [ 0.00290405  0.9912179   0.990798   -0.00156652]
 [-0.9697325  -0.01600512 -0.0050172   0.9699134 ]
 [ 0.2441379   0.04326752  0.04307875  0.24343067]]


In [19]:
# Define desirable T = A - BK (test)
T = np.diag((-0.05, -0.5, -1, -0.6))

# calculate K
B_left_inv = np.matmul(np.linalg.inv(np.matmul(B.T, B)), B.T)
K = - np.matmul(B_left_inv, (T - A))
print("K Matrix")
print(K)

K Matrix
[[ 0.03171315  2.37787697 -0.04738431 -3.79137701]]


In [20]:
save_controller_dict = {"K": K, "Target": T, "A": A, "B": B}
print(save_controller_dict)
with open('./cartpole_system_model/controller.pkl', 'wb') as filepath:
    pickle.dump(save_controller_dict, filepath, protocol=pickle.HIGHEST_PROTOCOL)

{'K': array([[ 0.03171315,  2.37787697, -0.04738431, -3.79137701]]), 'Target': array([[-0.05,  0.  ,  0.  ,  0.  ],
       [ 0.  , -0.5 ,  0.  ,  0.  ],
       [ 0.  ,  0.  , -1.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  , -0.6 ]]), 'A': array([[ 1.0000000e+00,  2.8594382e-04, -4.5564782e-08,  8.6408173e-04],
       [ 2.0000003e-02,  9.9994367e-01,  2.9265443e-08, -7.2883966e-04],
       [-3.1767406e-08, -1.3775987e-02,  1.0000000e+00,  3.1655234e-01],
       [-3.4968859e-09,  2.1930250e-04,  2.0000009e-02,  9.9978548e-01]],
      dtype=float32), 'B': array([[-1.4053483e-09],
       [ 1.9509907e-01],
       [-1.3254321e-08],
       [-2.9150683e-01]], dtype=float32)}


## Testing

In [21]:
with open('./cartpole_system_model/controller.pkl', 'rb') as filepath:
    controller_dict = pickle.load(filepath)
K = controller_dict['K']

In [22]:
env = gym.make('CartPole-v0')
x = env.reset()
cumul_reward = 0
for _ in range(1000):
    u = np.matmul(K, x)
    print(u)
    if u < 0:
        u = 0
    else:
        u = 1
    x, reward, done, _ = env.step(u)
    cumul_reward += reward
    if done:
        break
env.close()
print(cumul_reward)

[0.04975393]
[1.6820682]
[3.31497472]
[4.95550494]
[6.61045301]
[8.28618529]
[9.98842958]
[11.7220223]
8.0


In [None]:
# optimizing T