In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import numpy as np
from sklearn.decomposition import PCA

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [2]:
MODEL_NAME = '../../meta-llama/Llama-2-7b-chat-hf'

# Load our model from local
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME+'_model', device_map="auto", torch_dtype=torch.float16)

# And its associated tokenizer from local
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME+'_tokenizer')


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
print(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head):

In [4]:
import pandas as pd
df = pd.read_csv('processed_data.csv')
df = df.replace('\"', '', regex=True)
df

Unnamed: 0,Type,Sentence
0,Geography,Question: What is the capital of Australia?; A...
1,Geography,Question: What is the highest peak in North Am...
2,Geography,Question: Which country is the smallest in the...
3,Geography,Question: What is the longest river in the wor...
4,Geography,Question: What is the capital of Spain?; Answe...
...,...,...
310,Sport,Question: Who won the first ever World Cup in ...
311,Sport,Question: Who holds the record for the most go...
312,Sport,Question: Who has won the most Balon d'Or awar...
313,Sport,Question: Which club won the UEFA Europa Leagu...


In [5]:
types_unique = df['Type'].unique()
types_unique

array(['Geography', 'History', 'Literature', 'Music', 'Science', 'Sport'],
      dtype=object)

In [6]:
grouped = df.groupby('Type')['Sentence'].apply(list)
del df
grouped

Type
Geography     [Question: What is the capital of Australia?; ...
History       [Question: When was the United States founded?...
Literature    [Question: Who wrote 'To Kill a Mockingbird'?;...
Music         [Question: Who was the 'King of Pop'?; Answer:...
Science       [Question: What is the chemical symbol for Hyd...
Sport         [Question: Who won the FIFA World Cup in 2018?...
Name: Sentence, dtype: object

In [7]:
ENABLE_SAMPLING_OUTPUT = True
ENABLE_NAIVE_OUTPUT = False
ENABLE_LM_HEAD_ACTIVATION = False
ENABLE_MODEL_LAYERS_ACTIVATION = True
ENABLE_MODEL_LAYERS_INDEX = 31
ENABLE_MODEL_LAYERS_NAME = 'mlp'
ENABLE_PCA = False

In [8]:
def get_activation(layer):
    def hook(module, input, output):
        layer.activation = output.detach()  # we use `detach` to separate this value from the computation graph
    return hook

In [9]:
if ENABLE_LM_HEAD_ACTIVATION:
    hook_ref = model.lm_head.register_forward_hook(get_activation(model.lm_head))
    input = tokenizer("This is a test sentence", return_tensors="pt").to(device)
    outputs = model(**input)  # Process the input
    activation = model.lm_head.activation  # Access the activation of the lm_head layer you just hooked
    hook_ref.remove()

In [10]:
if ENABLE_LM_HEAD_ACTIVATION:
    print(activation.shape)
    print(activation)

In [11]:
if ENABLE_MODEL_LAYERS_ACTIVATION and ENABLE_MODEL_LAYERS_NAME == 'mlp':
    hook_ref = model.model.layers[ENABLE_MODEL_LAYERS_INDEX].mlp.register_forward_hook(get_activation(model.model.layers[ENABLE_MODEL_LAYERS_INDEX].mlp))
    tokenizer.pad_token = tokenizer.eos_token
    inputs = tokenizer(grouped['Sport'][:30], return_tensors="pt", padding='longest').to(device)
    outputs = model(**inputs)
    activation = model.model.layers[ENABLE_MODEL_LAYERS_INDEX].mlp.activation
    hook_ref.remove()

In [12]:
if ENABLE_MODEL_LAYERS_ACTIVATION and ENABLE_MODEL_LAYERS_NAME == 'mlp':
    print(activation.shape)
    print(activation)

torch.Size([30, 82, 4096])
tensor([[[ 3.7842e-01,  7.3914e-02, -2.0504e-03,  ..., -2.4109e-01,
          -2.3254e-01,  3.0566e-01],
         [-1.5796e-01,  4.2896e-01, -1.6248e-01,  ...,  3.2983e-01,
          -2.0740e-01, -8.5254e-01],
         [ 5.8105e-01,  7.5781e-01,  1.5234e+00,  ...,  2.5909e-02,
          -7.1094e-01,  1.3123e-01],
         ...,
         [ 5.3271e-01, -3.0518e-01,  3.3838e-01,  ..., -2.4817e-01,
           3.7903e-02, -2.3633e-01],
         [ 5.2930e-01, -3.0176e-01,  3.2300e-01,  ..., -2.2717e-01,
           2.8107e-02, -2.5171e-01],
         [ 5.2637e-01, -3.0127e-01,  2.9712e-01,  ..., -1.9763e-01,
           1.5945e-02, -2.7661e-01]],

        [[ 3.7842e-01,  7.3914e-02, -2.0504e-03,  ..., -2.4109e-01,
          -2.3254e-01,  3.0566e-01],
         [-1.5796e-01,  4.2896e-01, -1.6248e-01,  ...,  3.2983e-01,
          -2.0740e-01, -8.5254e-01],
         [ 5.8105e-01,  7.5781e-01,  1.5234e+00,  ...,  2.5909e-02,
          -7.1094e-01,  1.3123e-01],
         ...

In [13]:
torch.save(activation, 'type_tensor/mlp_31_sport.pt')

In [9]:
if ENABLE_PCA:
    # let's assume activation has shape [batch_size, num_tokens, num_features]
    pca_activation = activation.cpu().numpy()
    reshaped_activation = np.reshape(pca_activation, (pca_activation.shape[0]*pca_activation.shape[1], pca_activation.shape[2])) 

    dim_pca = 10

    pca = PCA(n_components=dim_pca)
    principal_components = pca.fit_transform(reshaped_activation)

    # If you want to bring back the original shape for each sample and tokens
    principal_components = principal_components.reshape(pca_activation.shape[0], pca_activation.shape[1], dim_pca)

In [26]:
if ENABLE_NAIVE_OUTPUT:
    # Assume 'output' is the final output from your model
    output_predictions = outputs.logits

    # Get the predicted token ids
    predicted_ids = torch.argmax(output_predictions, dim=-1)

    # Convert token ids back to words
    predicted_sentence = tokenizer.decode(predicted_ids[0])

    predicted_sentence

In [9]:
def top_k_sampling(logits, k=5):
    indices_to_remove = logits < torch.topk(logits, k)[0][..., -1, None]
    logits[indices_to_remove] = float('-inf')
    probs = torch.nn.functional.softmax(logits, dim=-1)
    next_token = torch.multinomial(probs, num_samples=1)
    return next_token

In [10]:
if ENABLE_SAMPLING_OUTPUT:
    input_sentence = "Who was responsible for the Great Fire of London"
    inputs = tokenizer.encode(input_sentence, return_tensors="pt").to(device)
    generated_sentence = inputs
    
    MAX_LENGTH = 200
    model.eval()
    with torch.no_grad():
        for _ in range(MAX_LENGTH):
            outputs = model(generated_sentence)
            next_token_logits = outputs.logits[:, -1, :]
            next_token = top_k_sampling(next_token_logits)
            generated_sentence = torch.cat((generated_sentence, next_token), dim=1)

    print(tokenizer.decode(generated_sentence[0]))

<s> Who was responsible for the Great Fire of London in 1666?
The Great Fire of London in 1666 was a devastating fire that swept through the city of London, England, killing hundreds of people and destroying thousands of buildings. The fire started on September 2, 1666, in a bakery on Pudding Lane, and quickly spread throughout the city, fueled by strong winds and closely-packed buildings.

There is some debate about who was responsible for the Great Fire of London, as it is not clear what sparked the fire. However, the most commonly cited theory is that it was caused by a baker named Thomas Farriner, who left a hot oven unattended in his bakery on Pudding Lane.

Farriner had been warned several times about the risk of fire in his bakery, which was located near the River Thames and surrounded by closely-packed


In [None]:
numpy_array = hooked_layer.activation.detach().cpu().numpy()
# Save the numpy array
np.save('activation.npy', numpy_array)