In [1]:
import numpy as np
from dm_control import suite
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
import copy
from dm_control.rl.control import Environment

In [2]:
#@title Loading and simulating a `suite` task{vertical-output: true}

# Load the environment
random_state = np.random.RandomState(42)
env: Environment = suite.load('cartpole', 'balance', task_kwargs={'random': random_state})

In [3]:
def display_video(frames, framerate=30):
    height, width, _ = frames[0].shape
    dpi = 70
    orig_backend = matplotlib.get_backend()
    matplotlib.use('Agg')  # Switch to headless 'Agg' to inhibit figure rendering.
    fig, ax = plt.subplots(1, 1, figsize=(width / dpi, height / dpi), dpi=dpi)
    matplotlib.use(orig_backend)  # Switch back to the original backend.
    ax.set_axis_off()
    ax.set_aspect('equal')
    ax.set_position([0, 0, 1, 1])
    im = ax.imshow(frames[0])

    def update(frame):
        im.set_data(frame)
        return [im]

    interval = 1000 / framerate
    anim = animation.FuncAnimation(fig=fig, func=update, frames=frames,
                                   interval=interval, blit=True, repeat=False)
    return HTML(anim.to_html5_video())

In [4]:
# Simulate episode with random actions
ticks = []
rewards = []
observations = []

frames = []
state_frames_count: int = 4

spec = env.action_spec()
time_step = env.reset()

# generar los primeros 4 frames para tener un estado inicial
for i in range(state_frames_count):
    action = 0
    print("Action: ", action)
    time_step = env.step(action)
    print("Position: ", time_step.observation['position'])
    print("Velocity: ", time_step.observation['velocity'])
    print("Reward: ", time_step.reward)

    camera0 = env.physics.render(camera_id=0, height=100, width=100)
    print("camera0.shape")
    print(camera0.shape)
    frames.append(camera0)
    # Save frame has file
    #cv2.imwrite("frame%d.jpg" % i, camera0)

    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)

html_video = display_video(frames, framerate=1. / env.control_timestep())

# Show video and plot reward and observations
num_sensors = len(time_step.observation)

_, ax = plt.subplots(1 + num_sensors, 1, sharex=True, figsize=(4, 8))
ax[0].plot(ticks, rewards)
ax[0].set_ylabel('reward')
ax[-1].set_xlabel('time')

for i, key in enumerate(time_step.observation):
    data = np.asarray([observations[j][key] for j in range(len(observations))])
    ax[i + 1].plot(ticks, data, label=key)
    ax[i + 1].set_ylabel(key)

print(f'cantidad de frames {len(frames)}')

html_video

Action:  0
Position:  [-0.02502827  0.99952497  0.03081941]
Velocity:  [0.0062639  0.01991774]
Reward:  0.9995640018467845
camera0.shape
(100, 100, 3)
Action:  0
Position:  [-0.0249667   0.99951808  0.03104204]
Velocity:  [0.00604953 0.02463555]
Reward:  0.9995517679394854
camera0.shape
(100, 100, 3)
Action:  0
Position:  [-0.02490729  0.99950966  0.03131201]
Velocity:  [0.00583347 0.02939091]
Reward:  0.9995365851964148
camera0.shape
(100, 100, 3)
Action:  0
Position:  [-0.02485004  0.99949966  0.03162972]
Velocity:  [0.00561537 0.03419107]
Reward:  0.9995183559606704
camera0.shape
(100, 100, 3)
cantidad de frames 4


In [5]:
frames[0].shape

(100, 100, 3)

In [6]:
from Agent import Agent

2023-11-05 12:59:32.451525: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-05 12:59:32.482975: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-05 12:59:32.483607: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
#@title Construimos al agente
agent = Agent(
    env,
    frames,
    state_frames_count,
    7,
)

In [8]:
agent.run_step()

Position:  [-0.02528163  0.99946515  0.03270184]
Velocity:  [-0.09193442  0.18037024]
Reward:  0.7984427133362162
fit


0.7984427133362162

In [9]:
html_video = display_video(frames, framerate=1. / env.control_timestep())

# Show video and plot reward and observations
num_sensors = len(time_step.observation)

_, ax = plt.subplots(1 + num_sensors, 1, sharex=True, figsize=(4, 8))
ax[0].plot(ticks, rewards)
ax[0].set_ylabel('reward')
ax[-1].set_xlabel('time')

# for i, key in enumerate(time_step.observation):
#   data = np.asarray([observations[j][key] for j in range(len(observations))])
#   ax[i+1].plot(ticks, data, label=key)
#   ax[i+1].set_ylabel(key)

html_video