
https://s3.us-west-2.amazonaws.com/montemac.share/probing_main.html


### To do:
* Collect a bunch of environments
* Collect activations for each environment of mouse in first state
* Save activations
* Train probes on these activations.

In [1]:


# Imports
%reload_ext autoreload
%autoreload 2

import os
import pickle

import numpy as np
import pandas as pd
import scipy as sp
from sklearn.feature_selection import f_classif
import torch as t
import torch.nn.functional as f
import xarray as xr
import plotly.express as px
import plotly as py
import plotly.subplots
import plotly.graph_objects as go
from einops import rearrange, repeat
from IPython.display import Video, display
from tqdm.auto import tqdm
import warnings

# NOTE: this is Monte's RL hooking code (and other stuff will be added in the future)
# Install normally with: pip install circrl
import circrl.module_hook as cmh
import circrl.rollouts as cro
import circrl.probing as cpr

import procgen_tools.models as models
import procgen_tools.maze as maze

warnings.filterwarnings("ignore", message=r'.*labels with no predicted samples.*')

# Hack to make sure cwd is the script folder
os.chdir(globals()['_dh'][0])

import numpy as np
import procgen_tools
import matplotlib.pyplot as plt
from procgen_tools.imports import *
from procgen_tools import visualization, maze, vfield, patch_utils



In [38]:
# Load models:
policy1, hook1 = load_model('15', 15)
policy2, hook2 = load_model('1', 15)
AX_SIZE = 2

In [39]:
# Generate mazes:

venv = maze.create_venv(num=1000, start_level=0, num_levels=0)



In [40]:
# Collect activations
values_to_store = [
    'embedder.block1.conv_in0',
    'embedder.block1.res1.resadd_out',
    'embedder.block1.res2.resadd_out',
    'embedder.block2.res1.resadd_out',
    'embedder.block2.res2.resadd_out',
    'embedder.block3.res1.resadd_out',
    'embedder.block3.res2.resadd_out',
    'embedder.flatten_out',
    'embedder.relufc_out',
    'fc_policy_out',
]


# Run obs through model to get all the activations

_ = hook.run_with_input(venv.reset().astype('float32'), values_to_store=values_to_store)
_ = hook1.run_with_input(venv.reset().astype('float32'), values_to_store=values_to_store)
_ = hook2.run_with_input(venv.reset().astype('float32'), values_to_store=values_to_store)

In [41]:
# Takes in a maze object, position in maze, venv (collection of environments), and returns labels regarding whether the position contains object
def is_obj_in_pos(obj, pos, venv):
    square_is_obj = []

    for env_idx in range(venv.num_envs):

        square_is_obj.append(maze.state_from_venv(venv, env_idx).full_grid()[pos] == obj)

    square_is_obj = np.array(square_is_obj, dtype=bool)
    return square_is_obj

# Train sparse linear probes on observation of model as a baseline

def test_probes_on_obs( obj_is_in_pos_array, index_nums = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), hook_to_use = hook1):
    results, _ = cpr.sparse_linear_probe(hook_to_use, ['embedder.block1.conv_in0'], obj_is_in_pos_array, 
        index_nums=index_nums, random_state=42, class_weight='balanced', max_iter=1000, C=10.)
    #px.line(x=index_nums, y=results.score.isel(value_label=0)).show()
    x = pd.DataFrame({'num_activations': index_nums, 'test_score': results.score.isel(value_label=0).values}).set_index('num_activations')
    print(x)

# Apply probes to various layers in the network
def test_probes_layers(obj_is_in_pos_array, hook_to_use = hook):
    value_labels_to_plot = [
        'embedder.block1.conv_in0',
        'embedder.block1.res1.resadd_out',
        'embedder.block1.res2.resadd_out',
        'embedder.block2.res1.resadd_out',
        'embedder.block2.res2.resadd_out',
        'embedder.block3.res1.resadd_out',
        'embedder.block3.res2.resadd_out']

    index_nums = np.array([1, 2, 10, 50, 100])

    results, _ = cpr.sparse_linear_probe(hook_to_use, value_labels_to_plot, obj_is_in_pos_array,
        index_nums = index_nums, random_state=42, class_weight='balanced', max_iter=10000, C=10.)

    def plot_sparse_probe_scores(results, y, index_nums, title, include_limits=True):
        scores_df = results.score.to_dataframe().reset_index()
        scores_df['K'] = index_nums[scores_df['index_num_step']]
        fig = px.line(scores_df, x='value_label', y='score', color='K', title=title)
        if include_limits:
            fig.add_hline(y=1., line_dash="dot", annotation_text="perfect", annotation_position="bottom right")
            baseline_score = abs(y.mean()-0.5) + 0.5
            fig.add_hline(y=baseline_score, line_dash="dot", annotation_text="baseline", 
                    annotation_position="bottom right")
        fig.show()

    plot_sparse_probe_scores(results, obj_is_in_pos_array, index_nums, 
        f'Probe score over layers and K-values for {pos} "is open"')





In [43]:
obj = maze.BLOCKED
pos = (4,5)
blocked_is_in_4_5_array = is_obj_in_pos(obj, pos, venv)
test_probes_on_obs(blocked_is_in_4_5_array, index_nums = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), hook_to_use = hook)


  0%|          | 0/1 [00:00<?, ?it/s]

                 test_score
num_activations            
1                     0.950
2                     0.975
3                     0.975
4                     0.975
5                     0.975
6                     0.975
7                     0.975
8                     0.975
9                     0.975
10                    1.000


In [44]:




test_probes_layers(blocked_is_in_4_5_array, hook1)
test_probes_layers(blocked_is_in_4_5_array, hook2)



  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]