# System Matrices Evaluation and Control Input Derivation

In [1]:
import gym
import numpy as np
import tensorflow as tf
import pickle
import os
from os import path

## Import matrices for linear control systems

In [2]:
model = tf.keras.models.load_model(
    './cartpole_system_model', custom_objects=None, compile=True, options=None
)

2022-03-14 21:23:01.474133: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
np_weights = model.get_weights()
print(model.A.get_weights())
print(model.B.get_weights())

[array([[ 1.00000298e+00,  1.03474304e-01, -6.81955644e-05,
        -1.53411880e-01],
       [ 2.00001802e-02,  9.95905221e-01, -1.23909122e-05,
         6.26267958e-03],
       [-5.69764438e-07, -5.87693080e-02,  1.00003278e+00,
         3.81267816e-01],
       [ 3.71733137e-07, -1.13821297e-03,  2.00367365e-02,
         1.00111854e+00]], dtype=float32)]
[array([[-2.2487126e-07,  1.9629727e-01,  2.1716911e-05, -2.9252878e-01]],
      dtype=float32)]


In [17]:
A = np_weights[0]
B = np_weights[1].T
print("A Matrix")
print(A)
print("B Matrix")
print(B)

A Matrix
[[ 1.00000298e+00  1.03474304e-01 -6.81955644e-05 -1.53411880e-01]
 [ 2.00001802e-02  9.95905221e-01 -1.23909122e-05  6.26267958e-03]
 [-5.69764438e-07 -5.87693080e-02  1.00003278e+00  3.81267816e-01]
 [ 3.71733137e-07 -1.13821297e-03  2.00367365e-02  1.00111854e+00]]
B Matrix
[[-2.2487126e-07]
 [ 1.9629727e-01]
 [ 2.1716911e-05]
 [-2.9252878e-01]]


## Deriving K

Rewrite the control system equation using $u = - Kx$.

$$
\begin{align}
    \dot x &= Ax - Bu \\
    \dot x &= Ax - BKx \\
\end{align}
$$

Define the target control system with the desired eigenvalue and eigenvector placements as $\dot x = Tx$.

Derive the state space to control input transformation $K$.

$$
\begin{align}
    \dot x 
        &= Tx \\
        &= Ax - BKx \\
    Tx &= Ax - BKx \\
    T &= A - BK \\
    K &= - B^{-1}_{left} (T - A) \\
\end{align}
$$

where $B^{-1}_{left}$ is the left inverse of $B$, which can be derived as,

$$
\begin{align}
    (A^T A)^{-1} A^T A &= I \\
    (A^T A)^{-1} A^T &= A^{-1}_{left} \\
\end{align}
$$

In [23]:
# Define desirable T = A - BK
# T is an identity matrix, where all of the eigenvalues are negative (stable system)
T = -1 * np.identity(A.shape[0])
B_left_inv = np.matmul(np.linalg.inv(np.matmul(B.T, B)), B.T)
K = - np.matmul(B_left_inv, (T - A))
print("K Matrix")
print(K)

K Matrix
[[ 0.03162967  3.15958397 -0.04689808 -4.70685101]]


In [28]:
save_controller_dict = {"K": K, "Target": T, "A": A, "B": B}
print(save_controller_dict)
with open('./cartpole_system_model/controller.pkl', 'wb') as filepath:
    pickle.dump(save_controller_dict, filepath)

{'K': array([[ 0.03162967,  3.15958397, -0.04689808, -4.70685101]]), 'Target': array([[-1., -0., -0., -0.],
       [-0., -1., -0., -0.],
       [-0., -0., -1., -0.],
       [-0., -0., -0., -1.]]), 'A': array([[ 1.00000298e+00,  1.03474304e-01, -6.81955644e-05,
        -1.53411880e-01],
       [ 2.00001802e-02,  9.95905221e-01, -1.23909122e-05,
         6.26267958e-03],
       [-5.69764438e-07, -5.87693080e-02,  1.00003278e+00,
         3.81267816e-01],
       [ 3.71733137e-07, -1.13821297e-03,  2.00367365e-02,
         1.00111854e+00]], dtype=float32), 'B': array([[-2.2487126e-07],
       [ 1.9629727e-01],
       [ 2.1716911e-05],
       [-2.9252878e-01]], dtype=float32)}


## Testing

In [30]:
env = gym.make('CartPole-v0')
x = env.reset()
cumul_reward = 0
for _ in range(1000):
    u = np.matmul(K, x)
    if u < 0.5:
        u = 1
    else:
        u = 0
    x, reward, done, _ = env.step(u)
    cumul_reward += reward
    if done:
        x = env.reset()
env.close()
print(cumul_reward)

1000.0
