In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [2]:
import sys
sys.path.insert(0,'../src')

#Run to install MuJoCo and `dm_control`
import distutils.util
import subprocess

# Use egl locally
%env MUJOCO_GL=glfw
# Use osmesa on DSMLP
# %env MUJOCO_GL=osmesa
%env PYOPENGL_PLATFORM=
%env PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python

print('Checking that the dm_control installation succeeded...')
try:
    from dm_control import suite
    env = suite.load('cartpole', 'swingup')
    pixels = env.physics.render()
except Exception as e:
    raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.\n'
      'If using a hosted Colab runtime, make sure you enable GPU acceleration '
      'by going to the Runtime menu and selecting "Choose runtime type".')
else:
    del pixels, suite


#All `dm_control` imports required for this tutorial

# The basic mujoco wrapper.
from dm_control import mujoco

# Access to enums and MuJoCo library functions.
from dm_control.mujoco.wrapper.mjbindings import enums
from dm_control.mujoco.wrapper.mjbindings import mjlib

# PyMJCF
from dm_control import mjcf

# Composer high level imports
from dm_control import composer
from dm_control.composer.observation import observable
from dm_control.composer import variation

# Imports for Composer tutorial example
from dm_control.composer.variation import distributions
from dm_control.composer.variation import noises
from dm_control.locomotion.arenas import floors

# Control Suite
from dm_control import suite

# Run through corridor example
from dm_control.locomotion.walkers import cmu_humanoid
from dm_control.locomotion.arenas import corridors as corridor_arenas
from dm_control.locomotion.tasks import corridors as corridor_tasks

# # Soccer
# from dm_control.locomotion import soccer

# Manipulation
from dm_control import manipulation

#@title Other imports and helper functions

# General
import copy
import os
import itertools
from IPython.display import clear_output
import numpy as np

# Graphics-related
import matplotlib
import matplotlib.animation as animation
import matplotlib.pyplot as plt
from IPython.display import HTML
import PIL.Image
# Internal loading of video libraries.

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Normal
from torch.optim import Adam
# from torch.utils.tensorboard import SummaryWriter

# try out the wrappers
from acme import wrappers
from dm_control import suite
from acme import wrappers
from model import *
from utils import *
from analysis import *
# Soft-Actor-Critic Model
from sac import *
from replay_memory import *
import argparse
import datetime
import itertools
import os
import random
import math
import pickle

# Use svg backend for figure rendering
%config InlineBackend.figure_format = 'svg'

# Font sizes
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12
plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# Inline video helper function
if os.environ.get('COLAB_NOTEBOOK_TEST', False):
  # We skip video generation during tests, as it is quite expensive.
  display_video = lambda *args, **kwargs: None
else:
  def display_video(frames, framerate=30):
    height, width, _ = frames[0].shape
    dpi = 70
    orig_backend = matplotlib.get_backend()
    matplotlib.use('Agg')  # Switch to headless 'Agg' to inhibit figure rendering.
    fig, ax = plt.subplots(1, 1, figsize=(width / dpi, height / dpi), dpi=dpi)
    matplotlib.use(orig_backend)  # Switch back to the original backend.
    ax.set_axis_off()
    ax.set_aspect('equal')
    ax.set_position([0, 0, 1, 1])
    im = ax.imshow(frames[0])
    def update(frame):
      im.set_data(frame)
      return [im]
    interval = 1000/framerate
    anim = animation.FuncAnimation(fig=fig, func=update, frames=frames,
                                   interval=interval, blit=True, repeat=False)
    return HTML(anim.to_html5_video())

# Seed numpy's global RNG so that cell outputs are deterministic. We also try to
# use RandomState instances that are local to a single cell wherever possible.
np.random.seed(42)


######  Environment wrappers  ####
from dm_env import specs




from IPython.display import display, HTML

#@title Loading and simulating a `suite` task{vertical-output: true}

# Load the environment
# random_state = np.random.RandomState(42)
# env = suite.load('hopper', 'stand', task_kwargs={'random': random_state})


env: MUJOCO_GL=glfw
env: PYOPENGL_PLATFORM=
env: PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
Checking that the dm_control installation succeeded...


2023-03-06 19:59:10.911 Python[59119:4315124] ApplePersistenceIgnoreState: Existing state will not be touched. New state will be written to /var/folders/jm/y1xn0yjj22v8l8rwlryfjdj00000gn/T/org.python.python.savedState
  version = LooseVersion(match.group(1))
  for plugin in metadata.entry_points().get(entry_point, []):
  def _figure_format_changed(self, name, old, new):


In [3]:
# load the environment
env = suite.load(domain_name="walker", task_name="walk")
# add wrappers onto the environment
env = NormilizeActionSpecWrapper(env)
env = MujocoActionNormalizer(environment=env, rescale='clip')
env = wrappers.SinglePrecisionWrapper(env)



class Args:
    env_name = 'whatever'
    policy = 'Gaussian'
    eval = True
    gamma = 0.99
    tau = 0.005
    lr = 0.0003
    alpha = 0.2
    automatic_entropy_tuning = True
    seed = 42
    batch_size = 512
    num_steps = 1000000
    hidden_size = 1024
    updates_per_step = 1
    start_steps = 10000
    target_update_interval = 1
    replay_size = 1000000
    # use the cuda to speedup
    # change back to True
    cuda = False


args = Args()

# get the dimensionality of the observation_spec after flattening
flat_obs = tree.flatten(env.observation_spec())
# combine all the shapes
# obs_dim = sum([item.shape[0] for item in flat_obs])
obs_dim = 0
for i in flat_obs:
    try:
        obs_dim += i.shape[0]
    except IndexError:
        obs_dim += 1

# setup agent, using Soft-Actor-Critic Model
agent = SAC(obs_dim, env.action_spec(), args)

# load checkpoint - UPLOAD YOUR FILE HERE!
model_path = '../data/models/sac_checkpoint_walker_walk_batch512_hidden1024_1123_500'
agent.load_checkpoint(model_path, evaluate=True)

# pull out model
model = agent.policy
# setup hook dict
hook_dict = init_hook_dict(model)
# add hooks
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        print(name, module)
        module.register_forward_hook(recordtodict_hook(name=name, hook_dict=hook_dict))




# run a few episodes just to collect activations
num_episodes_to_run = 10

for i in range(num_episodes_to_run):
    time_step = env.reset()
    episode_reward = 0
    while not time_step.last():  # or env.get_termination()
        # get the state
        state = get_flat_obs(time_step)
        # sample an action
        action = agent.select_action(state)
        time_step = env.step(action)

        # record reward
        episode_reward += time_step.reward
    print('Episode: {} Reward: {}'.format(i, episode_reward))


loaded_hook_dict = compile_hook_dict(hook_dict)

Loading models from ../data/models/sac_checkpoint_walker_walk_batch512_hidden1024_1123_500
linear1 Linear(in_features=24, out_features=1024, bias=True)
linear2 Linear(in_features=1024, out_features=1024, bias=True)
mean_linear Linear(in_features=1024, out_features=6, bias=True)
log_std_linear Linear(in_features=1024, out_features=6, bias=True)
Episode: 0 Reward: 887.2914056692971
Episode: 1 Reward: 959.9205621872097
Episode: 2 Reward: 972.9571659713984
Episode: 3 Reward: 951.9114729100838
Episode: 4 Reward: 938.7138728490099
Episode: 5 Reward: 975.6293765306473
Episode: 6 Reward: 937.6452852552757
Episode: 7 Reward: 969.0426700860262
Episode: 8 Reward: 949.8207999323495
Episode: 9 Reward: 951.6973258918151


In [4]:
loaded_hook_dict

{'linear1': array([[-6.2950077 , -1.8400515 , -0.61484694, ..., -6.196599  ,
         -4.020216  , -2.367227  ],
        [-6.3720865 , -2.5290775 , -0.55355024, ..., -5.506919  ,
         -3.7495894 , -1.7268317 ],
        [-6.044528  , -2.2897046 , -1.4255805 , ..., -6.5680957 ,
         -4.3975916 , -3.0558057 ],
        ...,
        [-7.6413584 , -0.2504742 ,  0.99779725, ..., -3.9121563 ,
         -4.3732467 , -0.20234418],
        [-7.681202  , -0.82079697,  1.5718853 , ..., -5.56293   ,
         -3.7457004 , -1.5461395 ],
        [-7.6753416 , -0.13907528,  0.13671899, ..., -5.111472  ,
         -4.0749817 , -1.4309449 ]], dtype=float32),
 'linear2': array([[-1.4720087 , -0.7104219 , -5.7952986 , ..., -1.180166  ,
          0.44371885, -1.0896236 ],
        [-1.6496288 , -0.6143804 , -5.4861    , ..., -1.1608174 ,
          1.142792  , -1.2387209 ],
        [-1.2699373 , -0.67645717, -4.7653494 , ..., -1.2263747 ,
          0.5790795 , -0.97282654],
        ...,
        [-1.54896

In [5]:
cka_online = {'activation_1': [],
                'activation_2': [],
                'cka': []}

# get combinations between activations
for activation1 in loaded_hook_dict.keys():
    for activation2 in loaded_hook_dict.keys():
        cka_calc = cka(loaded_hook_dict[activation1], loaded_hook_dict[activation2])
        # if activation1 == activation2:
        #     cka_calc = 1
        cka_online['cka'].append(cka_calc)
        cka_online['activation_1'].append(activation1)
        cka_online['activation_2'].append(activation2)

df = pd.DataFrame(cka_online).pivot('activation_1', 'activation_2', 'cka')
sns.heatmap(df, annot=True, cmap="Blues")

  left, right = sorted([left, right], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)
  left, right = sorted([left, right], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)
  left, right = sorted([left, right], reverse=reverse)
  bottom, top = sorted([bottom, top], reverse=reverse)


<matplotlib.axes._subplots.AxesSubplot at 0x179ce2b30>

In [6]:
df

activation_2,linear1,linear2,log_std_linear,mean_linear
activation_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
linear1,1.0,0.447772,0.343874,0.178376
linear2,0.447772,1.0,0.131462,0.092108
log_std_linear,0.343874,0.131462,1.0,0.211454
mean_linear,0.178376,0.092108,0.211454,1.0


In [7]:
agent.policy

GaussianPolicy(
  (linear1): Linear(in_features=24, out_features=1024, bias=True)
  (linear2): Linear(in_features=1024, out_features=1024, bias=True)
  (mean_linear): Linear(in_features=1024, out_features=6, bias=True)
  (log_std_linear): Linear(in_features=1024, out_features=6, bias=True)
)

In [8]:
from bc_net import BCNetworkContinuous

In [9]:
from BCNetwork import BCNetwork
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# network = BCNetwork(24, 6, 1024).to(device)
# network.load_state_dict(
#     torch.load("../data/bc_models/walker_1024_bc.pt", map_location=torch.device('cpu'))
# )


# # load the environment
# if args.env_name == 'HalfCheetah-v4':
#     env = suite.load(domain_name="cheetah", task_name="run")
# else:
#     raise NotImplementedError

# add wrappers onto the environment
env = NormilizeActionSpecWrapper(env)
env = MujocoActionNormalizer(environment=env, rescale='clip')
env = wrappers.SinglePrecisionWrapper(env)

# get the dimensionality of the observation_spec after flattening
flat_obs = tree.flatten(env.observation_spec())
# combine all the shapes

# obs_dim = sum([item.shape[0] for item in flat_obs])

for item in flat_obs:
    temp = []
    if len(item.shape) > 0:
        temp.append(item.shape[0])
obs_dim = sum(temp)

# initialize the network
network = BCNetworkContinuous(obs_dim, env.action_spec().shape[0])

# load the model
# network.load_state_dict(torch.load(args.model_path))
network.load_state_dict(
    torch.load("../data/bc_models/walker_1024_bc.pt", map_location=torch.device('cpu'))
)

# set to eval mode
network.eval()

# setup hook dict
hook_dict = init_hook_dict(network)
# add hooks
for name, module in network.named_modules():
    if isinstance(module, torch.nn.Linear):
        module.register_forward_hook(recordtodict_hook(name=name, hook_dict=hook_dict))



# run a few episodes just to collect activations
num_episodes_to_run = args.num_episodes

for i in range(num_episodes_to_run):
    time_step = env.reset()
    episode_reward = 0
    while not time_step.last():  # or env.get_termination()
        # get the state
        state = get_flat_obs(time_step)
        # add batch dimension
        state = np.expand_dims(state, axis=0)
        # sample an action
        tensor_state = torch.tensor(state, dtype=torch.float32)
        action = network(tensor_state).detach().numpy()
        time_step = env.step(action)
    print('Episode: {} Reward: {}'.format(i, episode_reward))

### optional: save + load the hook_dict

# make folder from args.save_path
os.makedirs(args.save_path, exist_ok=True)

save_path = os.path.join(args.save_path, 'hook_dict.npy')
save_hook_dict(hook_dict, save_path)

RuntimeError: Error(s) in loading state_dict for BCNetworkContinuous:
	Missing key(s) in state_dict: "fc3.weight", "fc3.bias". 
	Unexpected key(s) in state_dict: "mean_linear.weight", "mean_linear.bias", "log_std_linear.weight", "log_std_linear.bias". 
	size mismatch for fc1.weight: copying a param with shape torch.Size([1024, 24]) from checkpoint, the shape in current model is torch.Size([256, 9]).
	size mismatch for fc1.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]).
	size mismatch for fc2.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([256, 256]).
	size mismatch for fc2.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]).

In [None]:
bc_nonoise_model_activations_path = "../data/activations/cheetah_123456_10000_nonoise_bcmodel"
bc_nonoise_hooks_path = bc_nonoise_model_activations_path + "/hook_dict.npy"


activations.load_hook_dict()

TypeError: load_hook_dict() missing 1 required positional argument: 'load_path'

In [None]:
def evaluate_network(network, num_episodes=100, deterministic=True):
    """
    Evaluate a RL agent
    :param model: (BaseRLModel object) the RL Agent
    :param num_episodes: (int) number of episodes to evaluate it
    :return: (float) Mean reward for the last num_episodes
    """
    # This function will only work for a single Environment
    all_episode_rewards = []
    for i in range(num_episodes):
        episode_rewards = []
        done = False
        obs = env.reset()
        while not done:
            # _states are only useful when using LSTM policies
            action = network(torch.tensor(obs, dtype=torch.float32)).argmax().item()
            # here, action, rewards and dones are arrays
            # because we are using vectorized env
            obs, reward, done, info = env.step([action])
            episode_rewards.append(reward)

        all_episode_rewards.append(sum(episode_rewards))

    mean_episode_reward = np.mean(all_episode_rewards)
    print("Mean reward:", mean_episode_reward, "Num episodes:", num_episodes)

    return mean_episode_reward

In [None]:
from BCNetwork import BCNetwork
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

network = BCNetwork(24, 6, 1024).to(device)
network.load_state_dict(
    torch.load("../data/bc_models/walker_1024_bc.pt", map_location=torch.device('cpu'))
)

# setup hook dict
hook_dict_bc = init_hook_dict(network)
# add hooks
for name, module in network.named_modules():
    if isinstance(module, torch.nn.Linear):
        module.register_forward_hook(recordtodict_hook(name=name, hook_dict=hook_dict_bc))


# # run a few episodes to collect activations
# num_episodes_to_run = 10

# for i in range(num_episodes_to_run):
#     time_step = env.reset()
#     episode_reward = 0
#     while not time_step.last():
#         # get the state
#         state = get_flat_obs(time_step)


#         # sample an action
        
#         def select_action(self, state, evaluate=False):
#             state = torch.FloatTensor(state).to(self.device).unsqueeze(0)
#             if evaluate is False:
#                 action, _, _ = self.policy.sample(state)
#             else:
#                 _, _, action = self.policy.sample(state)
#             return action.detach().cpu().numpy()[0]
    
#         action = network.select_action(state)


#         time_step = env.step(action)

#         # record reward
#         episode_reward += time_step.reward
        
#     print('Episode: {} Reward: {}'.format(i, episode_reward))


# def evaluate_network(network, num_episodes=10):
#     """
#     Evaluate a RL agent
#     :param model: (BaseRLModel object) the RL Agent
#     :param num_episodes: (int) number of episodes to evaluate it
#     :return: (float) Mean reward for the last num_episodes
#     """
#     all_episode_rewards = []
#     for i in range(num_episodes):
#         episode_rewards = []
#         done = False


#         # flat_obs = tree.flatten(env.observation_spec())
#         # # combine all the shapes
#         # # obs_dim = sum([item.shape[0] for item in flat_obs])
#         # obs_dim = 0
#         # for i in flat_obs:
#         #     try:
#         #         obs_dim += i.shape[0]
#         #     except IndexError:
#         #         obs_dim += 1


#         obs = env.reset()
        
#         while not done:
       

#             action = network(torch.tensor(list(obs[3].values()), dtype=torch.float32))
#             obs, reward, done, info = env.step([action])
#             episode_rewards.append(reward)



#         all_episode_rewards.append(sum(episode_rewards))

#     mean_episode_reward = np.mean(all_episode_rewards)
#     print("Mean reward:", mean_episode_reward, "Num episodes:", num_episodes)

#     return mean_episode_reward

evaluate_network(network)

loaded_hook_dict_bc = compile_hook_dict(hook_dict_bc)

TypeError: must be real number, not NoneType

In [None]:
torch.tensor(list(obs[3].values()), dtype=torch.float32))

SyntaxError: unmatched ')' (3374787892.py, line 1)

In [None]:
obs

TimeStep(step_type=<StepType.FIRST: 0>, reward=None, discount=None, observation=OrderedDict([('orientations', array([ 0.288338  , -0.9575287 , -0.719978  , -0.6939969 ,  0.8276786 ,
       -0.5612024 ,  0.9359683 , -0.35208428,  0.27002808, -0.9628525 ,
        0.983658  , -0.18004684,  0.9929495 ,  0.1185384 ], dtype=float32)), ('height', array(1.3, dtype=float32)), ('velocity', array([0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32))]))

In [None]:
obs[3]

OrderedDict([('orientations',
              array([ 0.288338  , -0.9575287 , -0.719978  , -0.6939969 ,  0.8276786 ,
                     -0.5612024 ,  0.9359683 , -0.35208428,  0.27002808, -0.9628525 ,
                      0.983658  , -0.18004684,  0.9929495 ,  0.1185384 ], dtype=float32)),
             ('height', array(1.3, dtype=float32)),
             ('velocity',
              array([0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32))])

In [None]:
hook_dict

{'fc1': [], 'fc2': [], 'mean_linear': [], 'log_std_linear': []}

In [None]:
loaded_hook_dict_bc

{}

In [None]:
network

BCNetwork(
  (fc1): Linear(in_features=24, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=1024, bias=True)
  (mean_linear): Linear(in_features=1024, out_features=6, bias=True)
  (log_std_linear): Linear(in_features=1024, out_features=6, bias=True)
)