# Initialize

In [1]:
import distutils.util
import subprocess
if subprocess.run('nvidia-smi').returncode:
    raise RuntimeError(
      'Cannot communicate with GPU. '
      'Make sure you are using a GPU Colab runtime. '
      'Go to the Runtime menu and select Choose runtime type.')

print('Installing dm_control...')
#!pip install -q dm_control>=1.0.8

# Configure dm_control to use the EGL rendering backend (requires GPU)
%env MUJOCO_GL=egl

print('Checking that the dm_control installation succeeded...')
try:
    from dm_control import suite
    env = suite.load('cartpole', 'swingup')
    pixels = env.physics.render()
except Exception as e:
    raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.\n'
      'If using a hosted Colab runtime, make sure you enable GPU acceleration '
      'by going to the Runtime menu and selecting "Choose runtime type".')
else:
    del pixels, suite

!echo Installed dm_control $(pip show dm_control | grep -Po "(?<=Version: ).+")

# %pip -q install git+https://github.com/deepmind/acme.git#egg=dm-acme[jax,tf,envs]
# %pip -q install imageio-ffmpeg
# %pip -q install gdown

IMG_HEIGHT = 256
IMG_WIDTH = 256
# Removed unnecessary generated file
! rm -r "=1.0.8"

import os
import random
import math
import pickle
import numpy as np
import tree
# plot the activations
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'
import imageio
from PIL import Image
import io

from sklearn.decomposition import PCA


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Normal
from torch.optim import Adam

#Run to install MuJoCo and `dm_control`
# import this first to resolve the issue.
import sys
sys.path.insert(1, '../source/')
from acme import wrappers
from model import *
from utils import *
# Soft-Actor-Critic Model
from sac import *
from replay_memory import *

# try out the wrappers
from acme import wrappers
from dm_control import suite


Sun Feb 12 11:26:49 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.39.01    Driver Version: 510.39.01    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:18:00.0 Off |                    0 |
| N/A   36C    P0    55W / 300W |   1648MiB / 32768MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

  from .autonotebook import tqdm as notebook_tqdm


## Environment wrappers

In [2]:
#@title Environment wrappers
from dm_env import specs


# environment wrappers
class NormilizeActionSpecWrapper(wrappers.EnvironmentWrapper):
    """Turn each dimension of the actions into the range of [-1, 1]."""

    def __init__(self, environment):
        super().__init__(environment)

        action_spec = environment.action_spec()
        self._scale = action_spec.maximum - action_spec.minimum
        self._offset = action_spec.minimum

        minimum = action_spec.minimum * 0 - 1.
        maximum = action_spec.minimum * 0 + 1.
        self._action_spec = specs.BoundedArray(
            action_spec.shape,
            action_spec.dtype,
            minimum,
            maximum,
            name=action_spec.name)

    def _from_normal_actions(self, actions):
        actions = 0.5 * (actions + 1.0)  # a_t is now in the range [0, 1]
        # scale range to [minimum, maximum]
        return actions * self._scale + self._offset

    def step(self, action):
        action = self._from_normal_actions(action)
        return self._environment.step(action)

    def action_spec(self):
        return self._action_spec


class MujocoActionNormalizer(wrappers.EnvironmentWrapper):
    """Rescale actions to [-1, 1] range for mujoco physics engine.

    For control environments whose actions have bounded range in [-1, 1], this
      adaptor rescale actions to the desired range. This allows actor network to
      output unscaled actions for better gradient dynamics.
    """

    def __init__(self, environment, rescale='clip'):
        super().__init__(environment)
        self._rescale = rescale

    def step(self, action):
        """Rescale actions to [-1, 1] range before stepping wrapped environment."""
        if self._rescale == 'tanh':
            scaled_actions = tree.map_structure(np.tanh, action)
        elif self._rescale == 'clip':
            scaled_actions = tree.map_structure(lambda a: np.clip(a, -1., 1.), action)
        else:
            raise ValueError('Unrecognized scaling option: %s' % self._rescale)
        return self._environment.step(scaled_actions)



## Helper functions Moved to `utils.py`

## Environment and agent setup

**NOTE: Make sure you download the pretrained weights or upload your own weights before running this cell!**

In [3]:
# download the pretrained weights
#https://drive.google.com/file/d/13Xug1PoJ3bl3Ya9MTM0p-AR513mtsIVR/view?usp=share_link

In [4]:
# load the environment
env = suite.load(domain_name="cheetah", task_name="run")
# add wrappers onto the environment
env = NormilizeActionSpecWrapper(env)
env = MujocoActionNormalizer(environment=env, rescale='clip')
env = wrappers.SinglePrecisionWrapper(env)


class Args:
    env_name = 'whatever'
    policy = 'Gaussian'
    eval = True
    gamma = 0.99
    tau = 0.005
    lr = 0.0003
    alpha = 0.2
    automatic_entropy_tuning = True
    seed = 42
    batch_size = 256
    num_steps = 1000000
    hidden_size = 256
    updates_per_step = 1
    start_steps = 10000
    target_update_interval = 1
    replay_size = 1000000
    # use the cuda to speedup
    cuda = True


args = Args()

# get the dimensionality of the observation_spec after flattening
flat_obs = tree.flatten(env.observation_spec())
# combine all the shapes
obs_dim = sum([item.shape[0] for item in flat_obs])

# setup agent, using Soft-Actor-Critic Model
agent = SAC(obs_dim, env.action_spec(), args)
# load checkpoint - UPLOAD YOUR FILE HERE!
model_path = 'sac_checkpoint_cheetah_123456_10000'
agent.load_checkpoint(model_path, evaluate=True)

# pull out model
model = agent.policy
# setup hook dict
hook_dict = init_hook_dict(model)
# add hooks
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        module.register_forward_hook(recordtodict_hook(name=name, hook_dict=hook_dict))

Loading models from sac_checkpoint_cheetah_123456_10000


## Collecting activations and kinematics

In [5]:
CHEETAH_GEOM_NAMES = ['ground', 'torso', 'head', 'bthigh', 'bshin', 'bfoot', 'fthigh', 'fshin', 'ffoot']
CHEETAH_JOINT_NAMES = ['bthigh', 'bshin', 'bfoot', 'fthigh', 'fshin', 'ffoot']
CHEETAH_ACTUATOR_NAMES = ['bthigh', 'bshin', 'bfoot', 'fthigh', 'fshin', 'ffoot']

# get the mapping of the geom names
geom_names_to_idx = {geom_name: idx for idx, geom_name in enumerate(CHEETAH_GEOM_NAMES)}
# get the mapping of the joint names
joint_names_to_idx = {joint_name: idx for idx, joint_name in enumerate(CHEETAH_JOINT_NAMES)}
# get the mapping of the actuator names
actuator_names_to_idx = {actuator_name: idx for idx, actuator_name in enumerate(CHEETAH_ACTUATOR_NAMES)}

In [6]:
# run a few episodes just to collect activations
num_episodes_to_run = 42

# for recording kinematics
total_kinematic_dict = {
    'geom_positions': [],
    'joint_angles': [],
    'joint_velocities': [],
    'actuator_forces': []
}

rewards = []
states = []
actions = []
for i in range(num_episodes_to_run):
    time_step = env.reset()
    episode_reward = 0
    while not time_step.last():  # or env.get_termination()
        # get the state
        state = get_flat_obs(time_step)
        # sample an action
        action = agent.select_action(state)
        time_step = env.step(action)

        # record kinematics
        kinematic_dict = get_kinematics(env.physics, CHEETAH_GEOM_NAMES, CHEETAH_JOINT_NAMES, CHEETAH_ACTUATOR_NAMES)
        total_kinematic_dict['geom_positions'].append(kinematic_dict['geom_positions'])
        total_kinematic_dict['joint_angles'].append(kinematic_dict['joint_angles'])
        total_kinematic_dict['joint_velocities'].append(kinematic_dict['joint_velocities'])
        total_kinematic_dict['actuator_forces'].append(kinematic_dict['actuator_forces'])
        # record reward
        time_step_reward = time_step.reward
        rewards.append(time_step_reward)
        episode_reward += time_step_reward
        
        #record states and actions
        states.append(state)
        actions.append(action)
    if i % 10 == 0:
        print('Episode: {} Reward: {}'.format(i+1, episode_reward))

Episode: 1 Reward: 918.8241502698511
Episode: 11 Reward: 919.4929616115987
Episode: 21 Reward: 915.8008823245764
Episode: 31 Reward: 921.4741419758648
Episode: 41 Reward: 915.8140766378492


In [7]:
#### optional: save + load the hook_dict
# save_path = 'hook_dict.npy'
# save_hook_dict(hook_dict, save_path)
#
# load_path = 'hook_dict.npy'
# loaded_hook_dict = load_hook_dict(load_path)


In [8]:
# otherwise, just compile the hook_dict
loaded_hook_dict = compile_hook_dict(hook_dict)

In [9]:
# process the kinematics - convert the kinematics to numpy arrays
total_kinematic_dict['geom_positions'] = np.stack(total_kinematic_dict['geom_positions'],
                                                  axis=0)  # combine the geom_positions_arr into (t, n, 3)
total_kinematic_dict['joint_angles'] = np.array(total_kinematic_dict['joint_angles'])
total_kinematic_dict['joint_velocities'] = np.array(total_kinematic_dict['joint_velocities'])
total_kinematic_dict['actuator_forces'] = np.array(total_kinematic_dict['actuator_forces'])

In [10]:
len(total_kinematic_dict['geom_positions'][:, geom_names_to_idx['head'], 2])

42000

In [11]:
z_geom_dict = {}
joint_angle_dict = {}
joint_vel_dict = {}
act_forces_dict = {}

num_timesteps = len(rewards)
for i in CHEETAH_GEOM_NAMES:
    z_geom_dict[i] = total_kinematic_dict['geom_positions'][:, geom_names_to_idx[i], 2]

for i in CHEETAH_JOINT_NAMES:
    joint_angle_dict[i] = total_kinematic_dict['joint_angles'][:, joint_names_to_idx[i]]
    joint_vel_dict[i] = total_kinematic_dict['joint_velocities'][:, joint_names_to_idx[i]]

for i in CHEETAH_ACTUATOR_NAMES:
    act_forces_dict[i] = total_kinematic_dict['actuator_forces'][:, joint_names_to_idx[i]]

In [12]:
num_states = [z_geom_dict, joint_angle_dict, joint_vel_dict, act_forces_dict]

In [13]:
num_states

[{'ground': array([0., 0., 0., ..., 0., 0., 0.]),
  'torso': array([0.588667  , 0.59154101, 0.59674237, ..., 0.65061111, 0.64950628,
         0.64418033]),
  'head': array([0.66505983, 0.67102495, 0.68141113, ..., 0.82461644, 0.83437133,
         0.84169424]),
  'bthigh': array([0.47342346, 0.47237282, 0.47171742, ..., 0.51086154, 0.49003443,
         0.46129863]),
  'bshin': array([0.29321003, 0.29277844, 0.29248294, ..., 0.27261632, 0.25629149,
         0.24024391]),
  'bfoot': array([0.13752622, 0.13812621, 0.13863339, ..., 0.13214218, 0.08787675,
         0.05472467]),
  'fthigh': array([0.45129762, 0.45766102, 0.46664087, ..., 0.58562802, 0.59263438,
         0.59740106]),
  'fshin': array([0.247386  , 0.25131866, 0.25794958, ..., 0.34740862, 0.356267  ,
         0.36822702]),
  'ffoot': array([0.09825064, 0.09550321, 0.09859578, ..., 0.16030191, 0.16577168,
         0.18677045])},
 {'bthigh': array([ 0.01095101,  0.03084829,  0.05672276, ..., -0.29847306,
         -0.20588621, -0

In [14]:
z_geom_actions = {}
joint_angle_actions = {}
joint_vel_actions = {}
act_forces_actions = {}

for i in range(len(num_states)):
    if(i == 0):
        for k, v in num_states[i].items():
            z_geom_actions[k] = np.diff(num_states[i][k])
            z_geom_actions[k] = np.insert(z_geom_actions[k], 0, 0)
    if(i == 1):
        for k, v in num_states[i].items():
            joint_angle_actions[k] = np.diff(num_states[i][k])
            joint_angle_actions[k] = np.insert(joint_angle_actions[k], 0, 0)
    if(i == 2):
        for k, v in num_states[i].items():
            joint_vel_actions[k] = np.diff(num_states[i][k])
            joint_vel_actions[k] = np.insert(joint_vel_actions[k], 0, 0)
    else:
        for k, v in num_states[i].items():
            act_forces_actions[k] = np.diff(num_states[i][k])
            act_forces_actions[k] = np.insert(act_forces_actions[k], 0, 0)
        

In [15]:
num_actions = [z_geom_actions, joint_angle_actions, joint_vel_actions, act_forces_actions]

In [16]:
num_actions

[{'ground': array([0., 0., 0., ..., 0., 0., 0.]),
  'torso': array([ 0.        ,  0.00287401,  0.00520135, ...,  0.00106139,
         -0.00110482, -0.00532595]),
  'head': array([0.        , 0.00596512, 0.01038618, ..., 0.01134737, 0.00975488,
         0.00732291]),
  'bthigh': array([ 0.        , -0.00105064, -0.00065539, ..., -0.01540529,
         -0.02082711, -0.0287358 ]),
  'bshin': array([ 0.        , -0.00043159, -0.0002955 , ..., -0.01803324,
         -0.01632483, -0.01604758]),
  'bfoot': array([ 0.        ,  0.00059998,  0.00050718, ..., -0.04685094,
         -0.04426543, -0.03315207]),
  'fthigh': array([0.        , 0.0063634 , 0.00897984, ..., 0.00852168, 0.00700637,
         0.00476668]),
  'fshin': array([0.        , 0.00393266, 0.00663091, ..., 0.0066385 , 0.00885838,
         0.01196002]),
  'ffoot': array([ 0.        , -0.00274743,  0.00309257, ..., -0.00348401,
          0.00546977,  0.02099878])},
 {'bthigh': array([0.        , 0.01989729, 0.02587446, ..., 0.07031179

In [17]:
sarsa = [states, actions, rewards]

In [18]:
all_sarsa = [num_states, num_actions, rewards]

In [19]:
all_sarsa

[[{'ground': array([0., 0., 0., ..., 0., 0., 0.]),
   'torso': array([0.588667  , 0.59154101, 0.59674237, ..., 0.65061111, 0.64950628,
          0.64418033]),
   'head': array([0.66505983, 0.67102495, 0.68141113, ..., 0.82461644, 0.83437133,
          0.84169424]),
   'bthigh': array([0.47342346, 0.47237282, 0.47171742, ..., 0.51086154, 0.49003443,
          0.46129863]),
   'bshin': array([0.29321003, 0.29277844, 0.29248294, ..., 0.27261632, 0.25629149,
          0.24024391]),
   'bfoot': array([0.13752622, 0.13812621, 0.13863339, ..., 0.13214218, 0.08787675,
          0.05472467]),
   'fthigh': array([0.45129762, 0.45766102, 0.46664087, ..., 0.58562802, 0.59263438,
          0.59740106]),
   'fshin': array([0.247386  , 0.25131866, 0.25794958, ..., 0.34740862, 0.356267  ,
          0.36822702]),
   'ffoot': array([0.09825064, 0.09550321, 0.09859578, ..., 0.16030191, 0.16577168,
          0.18677045])},
  {'bthigh': array([ 0.01095101,  0.03084829,  0.05672276, ..., -0.29847306,
      