In [None]:
import numpy as np
from dm_control import suite
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
import copy
from dm_control.rl.control import Environment

In [None]:
#@title Loading and simulating a `suite` task{vertical-output: true}

# Load the environment
random_state = np.random.RandomState(42)
env: Environment = suite.load('cartpole', 'balance', task_kwargs={'random': random_state})

In [None]:
def display_video(frames, framerate=30):
    height, width, _ = frames[0].shape
    dpi = 70
    orig_backend = matplotlib.get_backend()
    matplotlib.use('Agg')  # Switch to headless 'Agg' to inhibit figure rendering.
    fig, ax = plt.subplots(1, 1, figsize=(width / dpi, height / dpi), dpi=dpi)
    matplotlib.use(orig_backend)  # Switch back to the original backend.
    ax.set_axis_off()
    ax.set_aspect('equal')
    ax.set_position([0, 0, 1, 1])
    im = ax.imshow(frames[0])

    def update(frame):
        im.set_data(frame)
        return [im]

    interval = 1000 / framerate
    anim = animation.FuncAnimation(fig=fig, func=update, frames=frames,
                                   interval=interval, blit=True, repeat=False)
    return HTML(anim.to_html5_video())

In [None]:
# Simulate episode with random actions
ticks = []
rewards = []
observations = []

frames = []

spec = env.action_spec()
time_step = env.reset()


In [12]:
spec.shape

(1,)

In [13]:
time_step.observation

OrderedDict([('position', array([ 0.21311321,  0.19642511, -0.98051883])),
             ('velocity', array([ 0.05017089, -4.78948813]))])

In [None]:
state_frames_count: int = 10000
for i in range(state_frames_count):
    action = 0
    print("Action: ", action)
    time_step = env.step(action)
    print("Position: ", time_step.observation['position'])
    print("Velocity: ", time_step.observation['velocity'])
    print("Reward: ", time_step.reward)

    camera0 = env.physics.render(camera_id=0, height=100, width=100)
    print("camera0.shape")
    print(camera0.shape)
    frames.append(camera0)
    # Save frame has file
    #cv2.imwrite("frame%d.jpg" % i, camera0)

    rewards.append(time_step.reward)
    observations.append(copy.deepcopy(time_step.observation))
    ticks.append(env.physics.data.time)

html_video = display_video(frames, framerate=1. / env.control_timestep())

# Show video and plot reward and observations
num_sensors = len(time_step.observation)

_, ax = plt.subplots(1 + num_sensors, 1, sharex=True, figsize=(4, 8))
ax[0].plot(ticks, rewards)
ax[0].set_ylabel('reward')
ax[-1].set_xlabel('time')

for i, key in enumerate(time_step.observation):
    data = np.asarray([observations[j][key] for j in range(len(observations))])
    ax[i + 1].plot(ticks, data, label=key)
    ax[i + 1].set_ylabel(key)

print(f'cantidad de frames {len(frames)}')

html_video

In [None]:
frames[0].shape

In [None]:
from Agent import Agent

In [None]:
#@title Construimos al agente
agent = Agent(
    env,
    frames,
    state_frames_count,
    7,
)

In [None]:
agent.run_step()

In [None]:
html_video = display_video(frames, framerate=1. / env.control_timestep())

# Show video and plot reward and observations
num_sensors = len(time_step.observation)

_, ax = plt.subplots(1 + num_sensors, 1, sharex=True, figsize=(4, 8))
ax[0].plot(ticks, rewards)
ax[0].set_ylabel('reward')
ax[-1].set_xlabel('time')

# for i, key in enumerate(time_step.observation):
#   data = np.asarray([observations[j][key] for j in range(len(observations))])
#   ax[i+1].plot(ticks, data, label=key)
#   ax[i+1].set_ylabel(key)

html_video