In [1]:
import gym
import numpy as np
import tensorflow as tf

In [35]:
TOTAL_STEPS = 5000
TRAINING_STEPS = 3000
VALIDATION_STEPS = 1000
BATCH_SIZE = 64

## Cartpole-v0 Info
### Action Space
    The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction of the fixed force the cart is pushed with.
    | Num | Action                 |
    |-----|------------------------|
    | 0   | Push cart to the left  |
    | 1   | Push cart to the right |
    
### Observation Space
    The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities:
    | Num | Observation           | Min                  | Max                |
    |-----|-----------------------|----------------------|--------------------|
    | 0   | Cart Position         | -4.8                 | 4.8                |
    | 1   | Cart Velocity         | -Inf                 | Inf                |
    | 2   | Pole Angle            | ~ -0.418 rad (-24°)  | ~ 0.418 rad (24°)  |
    | 3   | Pole Angular Velocity | -Inf                 | Inf                |
    
(https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py)

In [20]:
env = gym.make('CartPole-v0')
X = np.empty((TOTAL_STEPS, 5)) # len(env.observation_space.sample()) == 4
y = np.empty((TOTAL_STEPS, 4))

In [17]:
#### TEST ####
x_present = env.reset()
for timestep in range(1):
    u = env.action_space.sample() # sidenote: should this be one-hot encoded?
    x_prime, _, done, _ = env.step(u)
    
    # sample x[n], u[n], and x[n+1]
    X[timestep, :4] = x_present
    X[timestep, 4] = u
    y[timestep, :4] = x_prime
    x_present = x_prime
print(X[0])
print(y[0])

[ 0.0188304  -0.04221813  0.02094814  0.0217232   1.        ]
[ 0.01798603  0.15259723  0.0213826  -0.26427743]


In [3]:
#### "Let-it-run method" ####
for iteration in range(TOTAL_STEPS // 1000):
    x_present = env.reset()
    for timestep in range(1000):
        u = env.action_space.sample() # sidenote: should this be one-hot encoded?
        x_prime, _, done, _ = env.step(u)

        # sample x[n], u[n], and x[n+1]
        X[1000 * iteration + timestep, :4] = x_present
        X[1000 * iteration + timestep, 4] = u
        y[1000 * iteration + timestep, :4] = x_prime
        x_present = x_prime
env.close()

In [21]:
x_present = env.reset()
for num_timestep in range(TOTAL_STEPS):
    u = env.action_space.sample() # sidenote: should this be one-hot encoded?
    x_prime, _, done, _ = env.step(u)

    # sample x[n], u[n], and x[n+1]
    X[num_timestep, :4] = x_present
    X[num_timestep, 4] = u
    y[num_timestep, :4] = x_prime
    x_present = x_prime
    if done:
        x_present = env.reset()
env.close()

In [69]:
class CartpoleLinearControlSystem(tf.keras.Model):
    def __init__(self):
        super(CartpoleLinearControlSystem, self).__init__()
        self.x_input = tf.keras.layers.Input(shape = (4, ), name = 'x_input')
        self.A = tf.keras.layers.Dense(4, activation = 'linear', name = 'A')
        self.u_input = tf.keras.layers.Input(shape = (1, ), name = 'u_input')
        self.B = tf.keras.layers.Dense(4, activation = 'linear', name = 'B')
        self.add = tf.keras.layers.Add()
        
    def call(self, inputs):
        x = self.x_input(inputs[:, :4])
        u = self.u_input(inputs[:, 4])
        return self.add(self.A(x), self.B(u))

Uses reference code in *Python Machine Learning: 3rd Edition* (Raschka and Mirjalili, 2019) pp. 438, 512-515

In [44]:
# create tensorflow dataset object instance; concat X and y
dataset = tf.data.Dataset.from_tensor_slices((X, y)).shuffle(1000)
# split trajectories into training, validation, and testing
dataset_training = dataset.take(TRAINING_STEPS)
dataset_validation = dataset.skip(TRAINING_STEPS).take(VALIDATION_STEPS)
dataset_testing = dataset.skip(TRAINING_STEPS + VALIDATION_STEPS)
# split training data into batches
dataset_training = dataset_training.repeat(5).batch(BATCH_SIZE)

In [70]:
# compile model
model = CartpoleLinearControlSystem()
model.compile(tf.keras.optimizers.Adam(),
              loss = tf.keras.losses.MeanSquaredError(),
              metrics = [tf.keras.metrics.MeanSquaredError()])
history = model.fit(dataset_training, validation_data = dataset_validation, epochs = 1000)
tf.keras.models.save_model(
    model, './cartpole_system_model', overwrite = True, include_optimizer = True, save_format = None,
    signatures = None, options = None, save_traces = True
)

Epoch 1/1000


TypeError: in user code:

    File "/Users/johnlime/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 878, in train_function  *
        return step_function(self, iterator)
    File "/Users/johnlime/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 867, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/johnlime/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 860, in run_step  **
        outputs = model.train_step(data)
    File "/Users/johnlime/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 808, in train_step
        y_pred = self(x, training=True)
    File "/Users/johnlime/anaconda3/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None

    TypeError: Exception encountered when calling layer "cartpole_linear_control_system_22" (type CartpoleLinearControlSystem).
    
    in user code:
    
        File "/var/folders/z9/d3462rf1391bntsj4j_8rn_h0000gn/T/ipykernel_1704/105981308.py", line 11, in call  *
            x = self.x_input(inputs[:, :4])
    
        TypeError: 'KerasTensor' object is not callable
    
    
    Call arguments received:
      • inputs=tf.Tensor(shape=(None, 5), dtype=float32)


In [None]:
print(history)