In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [2]:
import sys
sys.path.insert(0,'../src')

#Run to install MuJoCo and `dm_control`
import distutils.util
import subprocess

# Use egl locally
%env MUJOCO_GL=glfw
# Use osmesa on DSMLP
# %env MUJOCO_GL=osmesa
%env PYOPENGL_PLATFORM=
%env PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python

print('Checking that the dm_control installation succeeded...')
try:
    from dm_control import suite
    env = suite.load('cartpole', 'swingup')
    pixels = env.physics.render()
except Exception as e:
    raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.\n'
      'If using a hosted Colab runtime, make sure you enable GPU acceleration '
      'by going to the Runtime menu and selecting "Choose runtime type".')
else:
    del pixels, suite


#All `dm_control` imports required for this tutorial

# The basic mujoco wrapper.
from dm_control import mujoco

# Access to enums and MuJoCo library functions.
from dm_control.mujoco.wrapper.mjbindings import enums
from dm_control.mujoco.wrapper.mjbindings import mjlib

# PyMJCF
from dm_control import mjcf

# Composer high level imports
from dm_control import composer
from dm_control.composer.observation import observable
from dm_control.composer import variation

# Imports for Composer tutorial example
from dm_control.composer.variation import distributions
from dm_control.composer.variation import noises
from dm_control.locomotion.arenas import floors

# Control Suite
from dm_control import suite

# Run through corridor example
from dm_control.locomotion.walkers import cmu_humanoid
from dm_control.locomotion.arenas import corridors as corridor_arenas
from dm_control.locomotion.tasks import corridors as corridor_tasks

# # Soccer
# from dm_control.locomotion import soccer

# Manipulation
from dm_control import manipulation

#@title Other imports and helper functions

# General
import copy
import os
import itertools
from IPython.display import clear_output
import numpy as np

# Graphics-related
import matplotlib
import matplotlib.animation as animation
import matplotlib.pyplot as plt
from IPython.display import HTML
import PIL.Image
# Internal loading of video libraries.

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Normal
from torch.optim import Adam
# from torch.utils.tensorboard import SummaryWriter

# try out the wrappers
from acme import wrappers
from dm_control import suite
from acme import wrappers
from model import *
from utils import *
from analysis import *
# Soft-Actor-Critic Model
from sac import *
from replay_memory import *
import argparse
import datetime
import itertools
import os
import random
import math
import pickle

# Use svg backend for figure rendering
%config InlineBackend.figure_format = 'svg'

# Font sizes
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12
plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# Inline video helper function
if os.environ.get('COLAB_NOTEBOOK_TEST', False):
  # We skip video generation during tests, as it is quite expensive.
  display_video = lambda *args, **kwargs: None
else:
  def display_video(frames, framerate=30):
    height, width, _ = frames[0].shape
    dpi = 70
    orig_backend = matplotlib.get_backend()
    matplotlib.use('Agg')  # Switch to headless 'Agg' to inhibit figure rendering.
    fig, ax = plt.subplots(1, 1, figsize=(width / dpi, height / dpi), dpi=dpi)
    matplotlib.use(orig_backend)  # Switch back to the original backend.
    ax.set_axis_off()
    ax.set_aspect('equal')
    ax.set_position([0, 0, 1, 1])
    im = ax.imshow(frames[0])
    def update(frame):
      im.set_data(frame)
      return [im]
    interval = 1000/framerate
    anim = animation.FuncAnimation(fig=fig, func=update, frames=frames,
                                   interval=interval, blit=True, repeat=False)
    return HTML(anim.to_html5_video())

# Seed numpy's global RNG so that cell outputs are deterministic. We also try to
# use RandomState instances that are local to a single cell wherever possible.
np.random.seed(42)


######  Environment wrappers  ####
from dm_env import specs




from IPython.display import display, HTML

#@title Loading and simulating a `suite` task{vertical-output: true}

# Load the environment
# random_state = np.random.RandomState(42)
# env = suite.load('hopper', 'stand', task_kwargs={'random': random_state})


env: MUJOCO_GL=glfw
env: PYOPENGL_PLATFORM=
env: PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
Checking that the dm_control installation succeeded...


2023-02-27 03:34:57.282 Python[7862:589145] ApplePersistenceIgnoreState: Existing state will not be touched. New state will be written to /var/folders/jm/y1xn0yjj22v8l8rwlryfjdj00000gn/T/org.python.python.savedState
  version = LooseVersion(match.group(1))
  for plugin in metadata.entry_points().get(entry_point, []):
  def _figure_format_changed(self, name, old, new):


In [3]:
# load the environment
env = suite.load(domain_name="walker", task_name="walk")
# add wrappers onto the environment
env = NormilizeActionSpecWrapper(env)
env = MujocoActionNormalizer(environment=env, rescale='clip')
env = wrappers.SinglePrecisionWrapper(env)



class Args:
    env_name = 'whatever'
    policy = 'Gaussian'
    eval = True
    gamma = 0.99
    tau = 0.005
    lr = 0.0003
    alpha = 0.2
    automatic_entropy_tuning = True
    seed = 42
    batch_size = 512
    num_steps = 1000000
    hidden_size = 1024
    updates_per_step = 1
    start_steps = 10000
    target_update_interval = 1
    replay_size = 1000000
    # use the cuda to speedup
    # change back to True
    cuda = False


args = Args()

# get the dimensionality of the observation_spec after flattening
flat_obs = tree.flatten(env.observation_spec())
# combine all the shapes
# obs_dim = sum([item.shape[0] for item in flat_obs])
obs_dim = 0
for i in flat_obs:
    try:
        obs_dim += i.shape[0]
    except IndexError:
        obs_dim += 1

# setup agent, using Soft-Actor-Critic Model
agent = SAC(obs_dim, env.action_spec(), args)

# load checkpoint - UPLOAD YOUR FILE HERE!
model_path = '../data/models/sac_checkpoint_walker_walk_batch512_hidden1024_1123_500'
agent.load_checkpoint(model_path, evaluate=True)

# pull out model
model = agent.policy
# setup hook dict
hook_dict = init_hook_dict(model)
# add hooks
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        print(name, module)
        module.register_forward_hook(recordtodict_hook(name=name, hook_dict=hook_dict))




# run a few episodes just to collect activations
num_episodes_to_run = 10

for i in range(num_episodes_to_run):
    time_step = env.reset()
    episode_reward = 0
    while not time_step.last():  # or env.get_termination()
        # get the state
        state = get_flat_obs(time_step)
        # sample an action
        action = agent.select_action(state)
        time_step = env.step(action)

        # record reward
        episode_reward += time_step.reward
    print('Episode: {} Reward: {}'.format(i, episode_reward))


loaded_hook_dict = compile_hook_dict(hook_dict)

Loading models from ../data/models/sac_checkpoint_walker_walk_batch512_hidden1024_1123_500
linear1 Linear(in_features=24, out_features=1024, bias=True)
linear2 Linear(in_features=1024, out_features=1024, bias=True)
mean_linear Linear(in_features=1024, out_features=6, bias=True)
log_std_linear Linear(in_features=1024, out_features=6, bias=True)
Episode: 0 Reward: 955.0075500141829
Episode: 1 Reward: 956.1730751353316
Episode: 2 Reward: 948.1554558863863
Episode: 3 Reward: 984.6284092217684
Episode: 4 Reward: 955.3059882670641
Episode: 5 Reward: 927.5523476832313
Episode: 6 Reward: 933.1429244191386
Episode: 7 Reward: 961.2203228957951
Episode: 8 Reward: 977.1477621048689
Episode: 9 Reward: 957.910533875227


In [4]:
loaded_hook_dict

{'linear1': array([[-7.2107925 , -1.5526221 ,  0.15424347, ..., -4.588475  ,
         -5.3386602 ,  0.8661616 ],
        [-7.226389  , -0.7850028 , -0.08305216, ..., -4.314388  ,
         -4.9399867 ,  2.3584607 ],
        [-6.7316866 , -0.09389877, -0.79379654, ..., -4.801814  ,
         -4.9864855 ,  0.5882549 ],
        ...,
        [-7.1725492 , -3.9632177 ,  1.4271803 , ..., -4.1808357 ,
         -4.72775   , -2.1093268 ],
        [-7.044485  , -4.226905  ,  1.1611543 , ..., -3.8486292 ,
         -4.724109  , -2.7586608 ],
        [-6.822184  , -5.0064316 ,  1.6473165 , ..., -3.266891  ,
         -4.435873  , -3.5404892 ]], dtype=float32),
 'linear2': array([[ -1.200247  ,  -0.72230244,  -0.67559105, ...,  -1.405656  ,
          -4.2488637 ,  -0.9742345 ],
        [ -1.6756593 ,  -0.8921489 ,  -1.5488566 , ...,  -1.4183024 ,
          -5.8085213 ,  -1.109582  ],
        [ -1.2008519 ,  -0.68899935,  -1.2157581 , ...,  -1.3481712 ,
          -4.7452326 ,  -0.91081744],
        ...,

In [5]:
cka_online = {'activation_1': [],
                'activation_2': [],
                'cka': []}

# get combinations between activations
for activation1 in loaded_hook_dict.keys():
    for activation2 in loaded_hook_dict.keys():
        cka_calc = cka(loaded_hook_dict[activation1], loaded_hook_dict[activation2])
        # if activation1 == activation2:
        #     cka_calc = 1
        cka_online['cka'].append(cka_calc)
        cka_online['activation_1'].append(activation1)
        cka_online['activation_2'].append(activation2)

df = pd.DataFrame(cka_online).pivot('activation_1', 'activation_2', 'cka')
sns.heatmap(df, annot=True, cmap="Blues")

  left, right = sorted([left, right], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)
  left, right = sorted([left, right], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)
  left, right = sorted([left, right], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)


<matplotlib.axes._subplots.AxesSubplot at 0x29a1eab60>

In [6]:
df

activation_2,linear1,linear2,log_std_linear,mean_linear
activation_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
linear1,1.0,0.480806,0.341848,0.192624
linear2,0.480806,1.0,0.159888,0.111561
log_std_linear,0.341848,0.159888,1.0,0.241109
mean_linear,0.192624,0.111561,0.241109,1.0


In [11]:
from BCNetwork import BCNetwork
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

network = BCNetwork(24, 6, 1024).to(device)
network.load_state_dict(
    torch.load("../data/bc_models/walker_1024_bc.pt", map_location=torch.device('cpu'))
)

model = network
# setup hook dict
hook_dict = init_hook_dict(model)
# add hooks
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        module.register_forward_hook(recordtodict_hook(name=name, hook_dict=hook_dict))


# TODO RUN EPISODES FOR BC
# run a few episodes just to collect activations
num_episodes_to_run = 10

for i in range(num_episodes_to_run):
    time_step = env.reset()
    episode_reward = 0
    while not time_step.last():
        # get the state
        state = get_flat_obs(time_step)
        # sample an action
        action = agent.select_action(state)
        time_step = env.step(action)

        # record reward
        episode_reward += time_step.reward
        
    print('Episode: {} Reward: {}'.format(i, episode_reward))


loaded_hook_dict_bc = compile_hook_dict(hook_dict)

Episode: 0 Reward: 947.0214768312871
Episode: 1 Reward: 933.8959191003814
Episode: 2 Reward: 941.6988157145679
Episode: 3 Reward: 945.9832877377048
Episode: 4 Reward: 935.8976229743566
Episode: 5 Reward: 954.0624102358706
Episode: 6 Reward: 957.8507294207811
Episode: 7 Reward: 940.6885181572288
Episode: 8 Reward: 979.2156553566456
Episode: 9 Reward: 937.6130148172379


In [12]:
hook_dict

{'fc1': [], 'fc2': [], 'mean_linear': [], 'log_std_linear': []}

In [9]:
loaded_hook_dict_bc

{}

In [10]:
network

BCNetwork(
  (fc1): Linear(in_features=24, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=1024, bias=True)
  (mean_linear): Linear(in_features=1024, out_features=6, bias=True)
  (log_std_linear): Linear(in_features=1024, out_features=6, bias=True)
)