# Import libraries

In [1]:
from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Tokenizer
from datasets import load_dataset
from tqdm import tqdm
import json
import torch
import argparse
import datasets
import numpy as np
import seaborn as sns


  from .autonotebook import tqdm as notebook_tqdm


# Config

In [2]:
dataset_nm = "Health_and_fitness"
dataset_path = f"../m2d2/dataset/{dataset_nm}/train.txt"

In [3]:
device_id = 'cuda'
tuned_model_path = "../m2d2/dataset/Health_and_fitness/models/checkpoint-20000/"
n_layers = 12
list_modules = ['attn', 'mlp']
trace_module_id = "transformer.h.{l}.{m}"

In [4]:
list_trace_module_ids = []

for l in range(n_layers):
    for m in list_modules:
        list_trace_module_ids.append(trace_module_id.format(l=l, m=m))

print(list_trace_module_ids[:3])

['transformer.h.0.attn', 'transformer.h.0.mlp', 'transformer.h.1.attn']


# Model

## load models

In [5]:
base_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device_id)
tuned_model = GPT2LMHeadModel.from_pretrained(tuned_model_path).to(device_id)
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

In [6]:
base_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device_id)
tuned_model = GPT2LMHeadModel.from_pretrained(tuned_model_path).to(device_id)

## save_tuned_model_activation_hook

In [7]:
def save_tuned_model_activation(m_id):
    def save_tuned_model_activation_hook(module, _input, _output):
        if m_id.endswith('attn'):
            tuned_model_activations[m_id] = _output[0].detach()
        elif m_id.endswith('mlp'):
            tuned_model_activations[m_id] = _output.detach()
    return save_tuned_model_activation_hook

## restore_tuned_model_activation_hook

In [8]:
def restore_base_model_activation(m_id, t):
    def restore_base_model_activation_hook(module, _input, _output):
        tuned_output = tuned_model_activations[m_id][:, t]
        if m_id.endswith('attn'):
            base_output = _output[0].detach()
            base_output[:, t] = tuned_output
            return tuple([base_output, tuple([_output[1][0], _output[1][1]])])
        elif m_id.endswith('mlp'):
            base_output = _output.detach()
            base_output[:, t] = tuned_output
            return base_output
    return restore_base_model_activation_hook

# Data

In [9]:
dataset = load_dataset("text", data_files=dataset_path)


Found cached dataset text (/rds/general/user/jj1122/home/.cache/huggingface/datasets/text/default-586044b5385f1284/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)
100%|██████████| 1/1 [00:00<00:00, 49.13it/s]


In [13]:
llll = dataset.map(gpt2_tokenizer['train']['text'])

TypeError: 'GPT2Tokenizer' object is not subscriptable

In [None]:
g = gpt2_tokenizer(dataset['train']['text'])

In [None]:
g

In [None]:
llll

In [10]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 2861587
    })
})

In [63]:
dataset['train']['text'][9282]

'9281,"Early rotaries had steam engines inside their car bodies to power the blades; a few are still in working order, and in particular one on the White Pass and Yukon Route in Alaska performs annual demonstration runs through thick snow for the benefit of photographers and railway enthusiasts. Rotaries of newer construction are either diesel- or electric-powered. Many steam plows were converted to electricity. Some electric plows can take their power from a locomotive, while others are semi-permanently coupled to power units, generally old locomotives with their traction motors removed; these are colloquially called ""snails."" (This is derived from the fact that engineless but motored units that take their power from another locomotive are ""slugs""; thus the opposite, with engine but no motors, is a ""snail."")"'

In [64]:
inputs = gpt2_tokenizer('9281,"Early rotaries had steam engines inside their car bodies to power the blades; a few are still in working order, and in particular one on the White Pass and Yukon Route in Alaska performs annual demonstration runs through thick snow for the benefit of photographers and railway enthusiasts. Rotaries of newer construction are either diesel- or electric-powered. Many steam plows were converted to electricity. Some electric plows can take their power from a locomotive, while others are semi-permanently coupled to power units, generally old locomotives with their traction motors removed; these are colloquially called ""snails."" (This is derived from the fact that engineless but motored units that take their power from another locomotive are ""slugs""; thus the opposite, with engine but no motors, is a ""snail."")"', return_tensors="pt").to(device_id)
n_tokens = inputs['input_ids'].shape[1]

# Main

## register hook - save_tuned_model_activation

In [65]:
tuned_model_activations = {}
for m_id in list_trace_module_ids:
    tuned_model.get_submodule(m_id).register_forward_hook(save_tuned_model_activation(m_id))

## get tuned_model_activation & losses

In [66]:
with torch.no_grad():
    tuned_model.eval()
    base_model.eval()

    tuned_outputs = tuned_model(**inputs, labels=inputs.input_ids)
    base_outputs = base_model(**inputs, labels=inputs.input_ids)
    
    tuned_loss = tuned_outputs.loss.item()
    base_loss = base_outputs.loss.item()
    

In [67]:
tuned_model_activations[m_id].shape

torch.Size([1, 173, 768])

## register hook - restore_base_model_activation & restored losses

In [68]:
restored_loss = {}
with torch.no_grad():
    tuned_model.eval()
    base_model.eval()
    for m_id in list_trace_module_ids:
        for t in range(n_tokens):
            hook = base_model.get_submodule(m_id).register_forward_hook(restore_base_model_activation(m_id, t))
            base_outputs = base_model(**inputs, labels=inputs.input_ids)
            restored_loss[m_id + f"_{t}"] = base_outputs.loss.item()
            hook.remove()
        

In [73]:
TE = tuned_loss - base_loss

In [77]:
IDE = {}

for m_id in list_trace_module_ids:
    for t in range(n_tokens):
        IDE[m_id + f'_{t}'] = (restored_loss[m_id + f'_{t}'] - base_loss).item()

In [69]:
base_loss, tuned_loss

(tensor(4.4584, device='cuda:0'), tensor(3.8255, device='cuda:0'))

In [72]:
np.mean(list(restored_loss.values()))

4.458096009343575

In [79]:
dataset['train']['text']

[',0',
 '0,"Francis Albert ""Frank"" Sinatra (born December 12, 1915 in Hoboken, New Jersey), grew up in a tenement with his parents, who had emigrated from Italy."',
 '1,Family background and early years.',
 '2,"Francis Albert Sinatra was born on December 12, 1915, in an upstairs tenement at 415 Monroe Street in Hoboken, New Jersey, the only child of Italian immigrants Natalina ""Dolly"" Garaventa and Antonino Martino ""Marty"" Sinatra. The couple had eloped on Valentine\'s Day, 1913, and were married at the city hall in Jersey City, New Jersey; they later got remarried in a church. Sinatra weighed at birth. His was a breech birth; he had to be with the aid of forceps, which caused severe scarring to his left cheek, neck, and ear, and perforated his ear drum, damage that remained for life. Due to his injuries at birth, his baptism was delayed for several months. A childhood operation on his mastoid bone left major scarring on his neck, and during adolescence he suffered from cystic ac

In [18]:
import numpy as np

In [19]:
a = torch.Tensor(np.arange(12).reshape(1, 3, 4))

In [20]:
b = torch.Tensor(np.arange(12, 16)).reshape(1, 4)

In [21]:
a[:, 1] = b

In [22]:
a

tensor([[[ 0.,  1.,  2.,  3.],
         [12., 13., 14., 15.],
         [ 8.,  9., 10., 11.]]])

In [261]:
base_model_activation['transformer.h.0.attn']

AttributeError: 'list' object has no attribute 'shape'

In [200]:
base_model_activation['transformer.h.0.attn'][1][0].shape

torch.Size([1, 12, 6, 64])

In [201]:
base_model_activation['transformer.h.0.attn'][1][1].shape

torch.Size([1, 12, 6, 64])

In [262]:
base_model_activation['transformer.h.0.attn'][0].shape

torch.Size([1, 6, 768])

In [263]:
base_model_activation['transformer.h.0.attn'][0][0].shape

torch.Size([6, 768])

In [206]:
base_model_activation['transformer.h.0.mlp'].shape

torch.Size([1, 6, 768])

In [203]:
base_model_activation['transformer.h.0.mlp'][1][0].shape

IndexError: index 1 is out of bounds for dimension 0 with size 1

In [201]:
base_model_activation['transformer.h.0.mlp'][1][1].shape

torch.Size([1, 12, 6, 64])

In [102]:

#     return hook


# model = MyModel()
# model.fc2.register_forward_hook(get_activation('fc2'))
# x = torch.randn(1, 25)
# output = model(x)
# print(activation['fc2'])

In [86]:
import torch.nn as nn

In [None]:
nn.Module.()

In [96]:
outputs = base_model(**inputs, output_hidden_states=True)

In [97]:
len(feats)

2

In [99]:
feats

{'feat': [tensor([[[-0.0301, -0.0073, -0.0091,  ..., -0.0105, -0.0744,  0.6182],
           [ 0.1909, -0.1697, -0.1325,  ..., -0.1108, -0.1694, -0.1655],
           [ 0.1474, -0.0252, -0.0461,  ..., -0.1597, -0.1263,  0.0111],
           [ 0.1078, -0.1110, -0.0686,  ..., -0.0815, -0.1464, -0.0997],
           [ 0.2341,  0.0183, -0.1318,  ..., -0.0780, -0.1161, -0.0645],
           [ 0.5379, -0.1639, -0.1694,  ..., -0.1605, -0.0340, -0.1341]]],
         device='cuda:0'),
  tensor([[[-0.0301, -0.0073, -0.0091,  ..., -0.0105, -0.0744,  0.6182],
           [ 0.1909, -0.1697, -0.1325,  ..., -0.1108, -0.1694, -0.1655],
           [ 0.1474, -0.0252, -0.0461,  ..., -0.1597, -0.1263,  0.0111],
           [ 0.1078, -0.1110, -0.0686,  ..., -0.0815, -0.1464, -0.0997],
           [ 0.2341,  0.0183, -0.1318,  ..., -0.0780, -0.1161, -0.0645],
           [ 0.5379, -0.1639, -0.1694,  ..., -0.1605, -0.0340, -0.1341]]],
         device='cuda:0'),
  tensor([[[-0.0301, -0.0073, -0.0091,  ..., -0.0105, -0.0

In [98]:
feats['feat'][0].shape

torch.Size([1, 6, 3072])

In [67]:
for m_id in list_trace_module_ids:
    

IndentationError: expected an indented block (3190203478.py, line 2)

In [41]:
trace_token_id = "transformer.h.{l}.{m}.{t}"
list_trace_token_ids = []


In [None]:
base_state = {}


In [90]:
# def hook_func(module, _input , _output):
#     feats['feat'].append(output.detach())
    
def save_base_state_hook(module, _input , _output):
    
    feats[list(module.named_modules())[0][0]] = _output.detach()

In [95]:
base_model.transformer.h[0].mlp.act.register_forward_hook(save_base_state_hook)

<torch.utils.hooks.RemovableHandle at 0x145cd9d6fdf0>

In [87]:
{k: v for k, v in base_model.transformer.h[0].mlp.named_modules()}

{'': GPT2MLP(
   (c_fc): Conv1D()
   (c_proj): Conv1D()
   (act): NewGELUActivation()
   (dropout): Dropout(p=0.1, inplace=False)
 ),
 'c_fc': Conv1D(),
 'c_proj': Conv1D(),
 'act': NewGELUActivation(),
 'dropout': Dropout(p=0.1, inplace=False)}

In [82]:
{k: v for k, v in base_model.transformer.h[0].mlp.act.named_modules()}

{'': NewGELUActivation()}

In [62]:
{k:v for k, v in base_model.named_parameters()}

{'transformer.wte.weight': Parameter containing:
 tensor([[-0.1101, -0.0393,  0.0331,  ..., -0.1364,  0.0151,  0.0453],
         [ 0.0403, -0.0486,  0.0462,  ...,  0.0861,  0.0025,  0.0432],
         [-0.1275,  0.0479,  0.1841,  ...,  0.0899, -0.1297, -0.0879],
         ...,
         [-0.0445, -0.0548,  0.0123,  ...,  0.1044,  0.0978, -0.0695],
         [ 0.1860,  0.0167,  0.0461,  ..., -0.0963,  0.0785, -0.0225],
         [ 0.0514, -0.0277,  0.0499,  ...,  0.0070,  0.1552,  0.1207]],
        device='cuda:0', requires_grad=True),
 'transformer.wpe.weight': Parameter containing:
 tensor([[-1.8821e-02, -1.9742e-01,  4.0267e-03,  ..., -4.3044e-02,
           2.8267e-02,  5.4490e-02],
         [ 2.3959e-02, -5.3792e-02, -9.4879e-02,  ...,  3.4170e-02,
           1.0172e-02, -1.5573e-04],
         [ 4.2161e-03, -8.4764e-02,  5.4515e-02,  ...,  1.9745e-02,
           1.9325e-02, -2.1424e-02],
         ...,
         [-1.7987e-03,  1.6052e-03, -5.5103e-02,  ...,  1.3617e-02,
          -7.1805e

In [61]:
{k:v for k, v in base_model.named_modules()}

{'': GPT2LMHeadModel(
   (transformer): GPT2Model(
     (wte): Embedding(50257, 768)
     (wpe): Embedding(1024, 768)
     (drop): Dropout(p=0.1, inplace=False)
     (h): ModuleList(
       (0-11): 12 x GPT2Block(
         (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
         (attn): GPT2Attention(
           (c_attn): Conv1D()
           (c_proj): Conv1D()
           (attn_dropout): Dropout(p=0.1, inplace=False)
           (resid_dropout): Dropout(p=0.1, inplace=False)
         )
         (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
         (mlp): GPT2MLP(
           (c_fc): Conv1D()
           (c_proj): Conv1D()
           (act): NewGELUActivation()
           (dropout): Dropout(p=0.1, inplace=False)
         )
       )
     )
     (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
   )
   (lm_head): Linear(in_features=768, out_features=50257, bias=False)
 ),
 'transformer': GPT2Model(
   (wte): Embedding(50257, 768)
   (wpe): Embedding(1

In [53]:
outputs.keys()

odict_keys(['logits', 'past_key_values', 'hidden_states'])

In [42]:
len(outputs['past_key_values'])

12

In [51]:
type(outputs)

transformers.modeling_outputs.CausalLMOutputWithCrossAttentions

In [None]:
base_model.forward()

In [50]:
outputs['past_key_values'][0][0].shape

torch.Size([1, 12, 5, 64])

In [58]:
outputs['hidden_states'][0]

(tensor([[[ 0.1286, -0.2933,  0.1470,  ...,  0.0599, -0.0342, -0.0586],
          [ 0.1835, -0.1787,  0.0199,  ...,  0.2900,  0.0298,  0.0143],
          [-0.0464, -0.0791,  0.1016,  ...,  0.0623,  0.0928, -0.0598],
          [ 0.0834, -0.0018,  0.1705,  ..., -0.0926, -0.1838,  0.0184],
          [ 0.1294, -0.0446,  0.2036,  ..., -0.1334,  0.1124, -0.1384]]],
        device='cuda:0', grad_fn=<AddBackward0>),
 tensor([[[ 0.0532, -0.7611,  0.4959,  ..., -1.2261, -0.0817,  0.6238],
          [-1.1012, -2.5899, -1.8142,  ...,  1.0625,  0.7677, -0.0037],
          [-1.2581,  0.0095, -0.0184,  ...,  0.0582,  0.1906, -0.4317],
          [-0.4614, -0.7692, -0.3092,  ...,  0.1943, -1.3663,  1.7228],
          [ 0.5679, -0.3924, -1.7748,  ...,  0.4108,  0.9494,  0.9103]]],
        device='cuda:0', grad_fn=<AddBackward0>),
 tensor([[[-0.1535, -1.6369,  1.5175,  ..., -0.9826, -0.1340,  0.3355],
          [-0.8389, -3.3402, -1.0746,  ...,  1.9019,  0.4481,  0.1189],
          [-1.2535,  0.1512,  0.

In [48]:
outputs['past_key_values'][0]

(tensor([[[[-1.5577e+00,  2.0585e+00,  1.3060e+00,  ..., -1.3825e+00,
            -6.3336e-01,  1.2624e+00],
           [-2.3259e+00,  2.3836e+00,  2.5165e+00,  ..., -1.5535e+00,
            -1.2954e+00,  2.7215e+00],
           [-2.3145e+00,  2.7101e+00,  1.5073e+00,  ..., -5.7809e-01,
            -1.9292e+00,  2.2634e+00],
           [-1.7966e+00,  2.5474e+00,  2.5459e+00,  ...,  5.4051e-01,
            -1.5586e+00,  1.7831e+00],
           [-2.2192e+00,  3.0667e+00,  1.1398e+00,  ..., -1.7093e+00,
            -1.2971e+00,  2.2413e+00]],
 
          [[-1.1913e-01, -4.7022e-01, -1.8900e+00,  ..., -6.0264e-01,
             1.1491e+00,  9.6728e-02],
           [ 8.0233e-01, -2.1909e+00, -2.6815e+00,  ..., -1.9200e+00,
             2.8771e+00,  4.0363e-01],
           [-7.1399e-01, -1.6105e+00, -2.9748e+00,  ..., -1.6920e+00,
             4.4462e+00,  1.8922e-01],
           [ 1.9563e-01, -2.0327e+00, -3.7951e+00,  ..., -9.0738e-01,
             3.3405e+00,  2.8258e+00],
           [ 9.2

In [None]:
outputs['past_key_values'][0]