In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [2]:
import sys
sys.path.insert(0,'../src')

#Run to install MuJoCo and `dm_control`
import distutils.util
import subprocess

# Use egl locally
%env MUJOCO_GL=glfw
# Use osmesa on DSMLP
# %env MUJOCO_GL=osmesa
%env PYOPENGL_PLATFORM=
%env PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python

print('Checking that the dm_control installation succeeded...')
try:
    from dm_control import suite
    env = suite.load('cartpole', 'swingup')
    pixels = env.physics.render()
except Exception as e:
    raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.\n'
      'If using a hosted Colab runtime, make sure you enable GPU acceleration '
      'by going to the Runtime menu and selecting "Choose runtime type".')
else:
    del pixels, suite


#All `dm_control` imports required for this tutorial

# The basic mujoco wrapper.
from dm_control import mujoco

# Access to enums and MuJoCo library functions.
from dm_control.mujoco.wrapper.mjbindings import enums
from dm_control.mujoco.wrapper.mjbindings import mjlib

# PyMJCF
from dm_control import mjcf

# Composer high level imports
from dm_control import composer
from dm_control.composer.observation import observable
from dm_control.composer import variation

# Imports for Composer tutorial example
from dm_control.composer.variation import distributions
from dm_control.composer.variation import noises
from dm_control.locomotion.arenas import floors

# Control Suite
from dm_control import suite

# Run through corridor example
from dm_control.locomotion.walkers import cmu_humanoid
from dm_control.locomotion.arenas import corridors as corridor_arenas
from dm_control.locomotion.tasks import corridors as corridor_tasks

# # Soccer
# from dm_control.locomotion import soccer

# Manipulation
from dm_control import manipulation

#@title Other imports and helper functions

# General
import copy
import os
import itertools
from IPython.display import clear_output
import numpy as np

# Graphics-related
import matplotlib
import matplotlib.animation as animation
import matplotlib.pyplot as plt
from IPython.display import HTML
import PIL.Image
# Internal loading of video libraries.

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Normal
from torch.optim import Adam
# from torch.utils.tensorboard import SummaryWriter

# try out the wrappers
from acme import wrappers
from dm_control import suite
from acme import wrappers
from model import *
from utils import *
from analysis import *
# Soft-Actor-Critic Model
from sac import *
from replay_memory import *
import argparse
import datetime
import itertools
import os
import random
import math
import pickle

# Use svg backend for figure rendering
%config InlineBackend.figure_format = 'svg'

# Font sizes
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12
plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# Inline video helper function
if os.environ.get('COLAB_NOTEBOOK_TEST', False):
  # We skip video generation during tests, as it is quite expensive.
  display_video = lambda *args, **kwargs: None
else:
  def display_video(frames, framerate=30):
    height, width, _ = frames[0].shape
    dpi = 70
    orig_backend = matplotlib.get_backend()
    matplotlib.use('Agg')  # Switch to headless 'Agg' to inhibit figure rendering.
    fig, ax = plt.subplots(1, 1, figsize=(width / dpi, height / dpi), dpi=dpi)
    matplotlib.use(orig_backend)  # Switch back to the original backend.
    ax.set_axis_off()
    ax.set_aspect('equal')
    ax.set_position([0, 0, 1, 1])
    im = ax.imshow(frames[0])
    def update(frame):
      im.set_data(frame)
      return [im]
    interval = 1000/framerate
    anim = animation.FuncAnimation(fig=fig, func=update, frames=frames,
                                   interval=interval, blit=True, repeat=False)
    return HTML(anim.to_html5_video())

# Seed numpy's global RNG so that cell outputs are deterministic. We also try to
# use RandomState instances that are local to a single cell wherever possible.
np.random.seed(42)


######  Environment wrappers  ####
from dm_env import specs




from IPython.display import display, HTML

#@title Loading and simulating a `suite` task{vertical-output: true}

# Load the environment
# random_state = np.random.RandomState(42)
# env = suite.load('hopper', 'stand', task_kwargs={'random': random_state})


env: MUJOCO_GL=glfw
env: PYOPENGL_PLATFORM=
env: PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
Checking that the dm_control installation succeeded...


2023-03-06 20:21:07.719 Python[59467:4329348] ApplePersistenceIgnoreState: Existing state will not be touched. New state will be written to /var/folders/jm/y1xn0yjj22v8l8rwlryfjdj00000gn/T/org.python.python.savedState
  version = LooseVersion(match.group(1))
  for plugin in metadata.entry_points().get(entry_point, []):
  def _figure_format_changed(self, name, old, new):


In [3]:
# load the environment
env = suite.load(domain_name="walker", task_name="walk")
# add wrappers onto the environment
env = NormilizeActionSpecWrapper(env)
env = MujocoActionNormalizer(environment=env, rescale='clip')
env = wrappers.SinglePrecisionWrapper(env)



class Args:
    env_name = 'whatever'
    policy = 'Gaussian'
    eval = True
    gamma = 0.99
    tau = 0.005
    lr = 0.0003
    alpha = 0.2
    automatic_entropy_tuning = True
    seed = 42
    batch_size = 512
    num_steps = 1000000
    hidden_size = 1024
    updates_per_step = 1
    start_steps = 10000
    target_update_interval = 1
    replay_size = 1000000
    # use the cuda to speedup
    # change back to True
    cuda = False


args = Args()

# get the dimensionality of the observation_spec after flattening
flat_obs = tree.flatten(env.observation_spec())
# combine all the shapes
# obs_dim = sum([item.shape[0] for item in flat_obs])
obs_dim = 0
for i in flat_obs:
    try:
        obs_dim += i.shape[0]
    except IndexError:
        obs_dim += 1

# setup agent, using Soft-Actor-Critic Model
agent = SAC(obs_dim, env.action_spec(), args)

# load checkpoint - UPLOAD YOUR FILE HERE!
model_path = '../data/models/sac_checkpoint_walker_walk_batch512_hidden1024_1123_500'
agent.load_checkpoint(model_path, evaluate=True)

# pull out model
model = agent.policy
# setup hook dict
hook_dict = init_hook_dict(model)
# add hooks
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        print(name, module)
        module.register_forward_hook(recordtodict_hook(name=name, hook_dict=hook_dict))




# run a few episodes just to collect activations
num_episodes_to_run = 10

for i in range(num_episodes_to_run):
    time_step = env.reset()
    episode_reward = 0
    while not time_step.last():  # or env.get_termination()
        # get the state
        state = get_flat_obs(time_step)
        # sample an action
        action = agent.select_action(state)
        time_step = env.step(action)

        # record reward
        episode_reward += time_step.reward
    print('Episode: {} Reward: {}'.format(i, episode_reward))


loaded_hook_dict = compile_hook_dict(hook_dict)

Loading models from ../data/models/sac_checkpoint_walker_walk_batch512_hidden1024_1123_500
linear1 Linear(in_features=24, out_features=1024, bias=True)
linear2 Linear(in_features=1024, out_features=1024, bias=True)
mean_linear Linear(in_features=1024, out_features=6, bias=True)
log_std_linear Linear(in_features=1024, out_features=6, bias=True)
Episode: 0 Reward: 957.2948565781116
Episode: 1 Reward: 947.7378954105079
Episode: 2 Reward: 936.684483201243
Episode: 3 Reward: 975.417980030179
Episode: 4 Reward: 946.7920878296718
Episode: 5 Reward: 955.6125686690211
Episode: 6 Reward: 933.842772198841
Episode: 7 Reward: 954.8095865994692
Episode: 8 Reward: 952.819503207691
Episode: 9 Reward: 944.1028492054902


In [4]:
loaded_hook_dict

{'linear1': array([[-7.3313656 ,  0.1412189 ,  0.21448803, ..., -4.0569754 ,
         -2.7876766 ,  2.9595997 ],
        [-7.340417  , -0.3207389 , -0.48975646, ..., -4.4404936 ,
         -3.7557945 ,  2.6223977 ],
        [-6.8495884 , -2.0276978 , -1.6926117 , ..., -2.2804976 ,
         -3.9817255 ,  2.5995543 ],
        ...,
        [-7.7830048 , -1.3752791 ,  0.7766168 , ..., -4.063099  ,
         -2.4873333 , -1.5259011 ],
        [-7.826517  , -1.5582939 ,  1.208106  , ..., -3.948381  ,
         -2.4613314 , -2.0789487 ],
        [-7.8970375 , -2.0185642 ,  1.3682888 , ..., -4.269564  ,
         -2.365864  , -2.3259103 ]], dtype=float32),
 'linear2': array([[-2.7469845 , -0.9536476 , -3.4483504 , ..., -1.9903384 ,
         -6.59542   , -1.4267786 ],
        [-2.7416854 , -1.0363011 , -2.0316954 , ..., -2.0185485 ,
         -5.7716246 , -1.3568182 ],
        [-3.5836794 , -1.4418666 , -2.9418087 , ..., -2.5853903 ,
         -9.6073    , -1.5890995 ],
        ...,
        [-1.10052

In [5]:
cka_online = {'activation_1': [],
                'activation_2': [],
                'cka': []}

# get combinations between activations
for activation1 in loaded_hook_dict.keys():
    for activation2 in loaded_hook_dict.keys():
        cka_calc = cka(loaded_hook_dict[activation1], loaded_hook_dict[activation2])
        # if activation1 == activation2:
        #     cka_calc = 1
        cka_online['cka'].append(cka_calc)
        cka_online['activation_1'].append(activation1)
        cka_online['activation_2'].append(activation2)

df = pd.DataFrame(cka_online).pivot('activation_1', 'activation_2', 'cka')
sns.heatmap(df, annot=True, cmap="Blues")

  left, right = sorted([left, right], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)
  left, right = sorted([left, right], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)
  left, right = sorted([left, right], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)


<matplotlib.axes._subplots.AxesSubplot at 0x29c93ef20>

In [6]:
df

activation_2,linear1,linear2,log_std_linear,mean_linear
activation_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
linear1,1.0,0.434954,0.32188,0.182074
linear2,0.434954,1.0,0.103627,0.084923
log_std_linear,0.32188,0.103627,1.0,0.226691
mean_linear,0.182074,0.084923,0.226691,1.0


In [7]:
agent.policy

GaussianPolicy(
  (linear1): Linear(in_features=24, out_features=1024, bias=True)
  (linear2): Linear(in_features=1024, out_features=1024, bias=True)
  (mean_linear): Linear(in_features=1024, out_features=6, bias=True)
  (log_std_linear): Linear(in_features=1024, out_features=6, bias=True)
)

In [8]:
#@title Environment wrappers
from dm_env import specs


# environment wrappers
class NormilizeActionSpecWrapper(wrappers.EnvironmentWrapper):
    """Turn each dimension of the actions into the range of [-1, 1]."""

    def __init__(self, environment):
        super().__init__(environment)

        action_spec = environment.action_spec()
        self._scale = action_spec.maximum - action_spec.minimum
        self._offset = action_spec.minimum

        minimum = action_spec.minimum * 0 - 1.
        maximum = action_spec.minimum * 0 + 1.
        self._action_spec = specs.BoundedArray(
            action_spec.shape,
            action_spec.dtype,
            minimum,
            maximum,
            name=action_spec.name)

    def _from_normal_actions(self, actions):
        actions = 0.5 * (actions + 1.0)  # a_t is now in the range [0, 1]
        # scale range to [minimum, maximum]
        return actions * self._scale + self._offset

    def step(self, action):
        action = self._from_normal_actions(action)
        return self._environment.step(action)

    def action_spec(self):
        return self._action_spec


class MujocoActionNormalizer(wrappers.EnvironmentWrapper):
    """Rescale actions to [-1, 1] range for mujoco physics engine.

    For control environments whose actions have bounded range in [-1, 1], this
      adaptor rescale actions to the desired range. This allows actor network to
      output unscaled actions for better gradient dynamics.
    """

    def __init__(self, environment, rescale='clip'):
        super().__init__(environment)
        self._rescale = rescale

    def step(self, action):
        """Rescale actions to [-1, 1] range before stepping wrapped environment."""
        if self._rescale == 'tanh':
            scaled_actions = tree.map_structure(np.tanh, action)
        elif self._rescale == 'clip':
            scaled_actions = tree.map_structure(lambda a: np.clip(a, -1., 1.), action)
        else:
            raise ValueError('Unrecognized scaling option: %s' % self._rescale)
        return self._environment.step(scaled_actions)

In [12]:
loaded_hook_dict

{'linear1': array([[-7.3313656 ,  0.1412189 ,  0.21448803, ..., -4.0569754 ,
         -2.7876766 ,  2.9595997 ],
        [-7.340417  , -0.3207389 , -0.48975646, ..., -4.4404936 ,
         -3.7557945 ,  2.6223977 ],
        [-6.8495884 , -2.0276978 , -1.6926117 , ..., -2.2804976 ,
         -3.9817255 ,  2.5995543 ],
        ...,
        [-7.7830048 , -1.3752791 ,  0.7766168 , ..., -4.063099  ,
         -2.4873333 , -1.5259011 ],
        [-7.826517  , -1.5582939 ,  1.208106  , ..., -3.948381  ,
         -2.4613314 , -2.0789487 ],
        [-7.8970375 , -2.0185642 ,  1.3682888 , ..., -4.269564  ,
         -2.365864  , -2.3259103 ]], dtype=float32),
 'linear2': array([[-2.7469845 , -0.9536476 , -3.4483504 , ..., -1.9903384 ,
         -6.59542   , -1.4267786 ],
        [-2.7416854 , -1.0363011 , -2.0316954 , ..., -2.0185485 ,
         -5.7716246 , -1.3568182 ],
        [-3.5836794 , -1.4418666 , -2.9418087 , ..., -2.5853903 ,
         -9.6073    , -1.5890995 ],
        ...,
        [-1.10052

: 