In [2]:
import sys
sys.path.append("..")
from model.BERT import *

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.models as tmodels
from functools import partial
import collections

# dummy data: 10 batches of images with batch size 16
dataset = [torch.rand(16,128) for _ in range(10)]

# network: lstm
net = nn.Linear(128,64)

# a dictionary that keeps saving the activations as they come
func_inputs = collections.defaultdict(list)
func_activations = collections.defaultdict(list)
def save_activation(name, mod, inps, out):
    func_inputs[name] = [inp.cpu() for inp in inps]
    func_activations[name] = out.cpu()

# Registering hooks for all the Conv2d layers
# Note: Hooks are called EVERY TIME the module performs a forward pass. For modules that are
# called repeatedly at different stages of the forward pass (like RELUs), this will save different
# activations. Editing the forward pass code to save activations is the way to go for these cases.
for name, m in net.named_modules():
    # partial to assign the layer name to each hook
    m.register_forward_hook(partial(save_activation, name))

# forward pass through the full dataset
for batch in dataset:
    out = net(batch)

# concatenate all the outputs we saved to get the the activations for each layer for the whole dataset
func_activations = {name: outputs for name, outputs in func_activations.items()}

# just print out the sizes of the saved activations as a sanity check
for k,v in func_activations.items():
    print(k, v.shape)
    print("***")

 torch.Size([16, 64])
***


In [30]:
bert_config = BertConfig(
    hidden_size=768,
    num_hidden_layers=2,
    num_attention_heads=2,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=2,
    initializer_range=0.02
)
model = BertForSequenceClassification(bert_config, 2)

init_weight = True


In [31]:
for name, m in model.named_modules():
    print(name)


bert
bert.embeddings
bert.embeddings.word_embeddings
bert.embeddings.position_embeddings
bert.embeddings.token_type_embeddings
bert.embeddings.LayerNorm
bert.embeddings.dropout
bert.encoder
bert.encoder.layer
bert.encoder.layer.0
bert.encoder.layer.0.attention
bert.encoder.layer.0.attention.self
bert.encoder.layer.0.attention.self.query
bert.encoder.layer.0.attention.self.key
bert.encoder.layer.0.attention.self.value
bert.encoder.layer.0.attention.self.dropout
bert.encoder.layer.0.attention.output
bert.encoder.layer.0.attention.output.dense
bert.encoder.layer.0.attention.output.LayerNorm
bert.encoder.layer.0.attention.output.dropout
bert.encoder.layer.0.intermediate
bert.encoder.layer.0.intermediate.dense
bert.encoder.layer.0.output
bert.encoder.layer.0.output.dense
bert.encoder.layer.0.output.LayerNorm
bert.encoder.layer.0.output.dropout
bert.encoder.layer.1
bert.encoder.layer.1.attention
bert.encoder.layer.1.attention.self
bert.encoder.layer.1.attention.self.query
bert.encoder.layer

In [32]:
# Registering hooks for all the Conv2d layers
# Note: Hooks are called EVERY TIME the module performs a forward pass. For modules that are
# called repeatedly at different stages of the forward pass (like RELUs), this will save different
# activations. Editing the forward pass code to save activations is the way to go for these cases.
for name, m in model.named_modules():
    # partial to assign the layer name to each hook
    m.register_forward_hook(partial(save_activation, name))