In [1]:
import numpy as np
import random
import pandas as pd
import torch as t
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the CSV file to verify it
df_sentences = pd.read_csv('../datasets/ilikecats.csv')

# Display the first few entries
print(df_sentences.head())

# Filter the DataFrame for rows where the Label column is 'Animal'
animal_sentences = df_sentences[df_sentences['Label'] == 'Animal']['Sentence'].tolist()
non_animal_sentences = df_sentences[df_sentences['Label'] == 'Non-Animal']['Sentence'].tolist()


    Label           Sentence
0  Animal       I like cats.
1  Animal       I like dogs.
2  Animal  I like elephants.
3  Animal     I like tigers.
4  Animal      I like birds.


In [3]:
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
tokenizer.pad_token = tokenizer.eos_token



In [4]:
# Initialize the tokenizer and model # !pip install huggingface-hub #!huggingface-cli login
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

_ = model.eval()

Loading checkpoint shards: 100%|██████████| 2/2 [00:41<00:00, 20.82s/it]


In [5]:

# Check if CUDA is available and set the model to use GPU
print('cuda') if t.cuda.is_available() else print("cpu")
device = t.device("cuda" if t.cuda.is_available() else "cpu")
model.to(device)
print("Using device:", device)


cuda


OutOfMemoryError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 23.69 GiB total capacity; 23.29 GiB already allocated; 51.69 MiB free; 23.29 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# Set the EOS token as the padding token if it's not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Global container to store activations (GPT2)
activations = {}

def get_activation(name):
    # closure to store activation
    def hook(model, input, output):
        activations[name] = output[0].detach()
    return hook

# Attaching hooks to each layer of the model
for i, layer in enumerate(model.model.layers):
    layer.register_forward_hook(get_activation(f'Decoder_Layer_{i}'))


In [None]:
# Print max number of layers and max number of tokens
print(f"Number of layers = {len(model.model.layers)}")
max_tokens = tokenizer(df_sentences['Sentence'].tolist(), return_tensors="pt", padding=True, truncation=True)['input_ids'].shape[-1]
print(f"Number of tokens = {max_tokens}")

Number of layers = 32
Number of tokens = 13


In [None]:
# Parameters for looping
num_samples = 5
layer_names = ['Layer_{}'.format(i) for i in range(12, 12)]  # Adjust as needed based on your model structure
token_indices = [5]  # Example token indices

# Shuffle sentences
random.shuffle(animal_sentences)
random.shuffle(non_animal_sentences)

# Process sentences and get activations
inputs = tokenizer(animal_sentences[0:num_samples], return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs)
animal_activations = activations.copy()

activations = {}

inputs = tokenizer(non_animal_sentences[0:num_samples], return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs)
non_animal_activations = activations.copy()

# Loop over each layer and token
for layer_name in layer_names:
    for token_num in token_indices:
        try:
            # Extract activations for the current layer and token
            animal_activations_layer_token = animal_activations[layer_name][:, token_num, :].numpy()
            non_animal_activations_layer_token = non_animal_activations[layer_name][:, token_num, :].numpy()
            
            # Combine datasets
            combined_activations = np.vstack((animal_activations_layer_token, non_animal_activations_layer_token))
            labels = np.array([0] * len(animal_activations_layer_token) + [1] * len(non_animal_activations_layer_token))
            
            # Perform PCA
            pca = PCA(n_components=2)  # 2D for easy plotting
            reduced_activations = pca.fit_transform(combined_activations)
            
            # Plotting
            plt.figure(figsize=(4, 3))
            plt.scatter(reduced_activations[labels == 0, 0], reduced_activations[labels == 0, 1], c='red', label='Animal', alpha=0.5)
            plt.scatter(reduced_activations[labels == 1, 0], reduced_activations[labels == 1, 1], c='blue', label='Non-animal', alpha=0.5)
            plt.xlabel('Principal Component 1')
            plt.ylabel('Principal Component 2')
            plt.legend()
            plt.title(f'PCA of {layer_name}-Token {token_num} Activations')
            plt.show()
            
        except Exception as e:
            print(f"Error processing {layer_name}-Token {token_num}: {e}")


KeyboardInterrupt: 