In [3]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
## Data
import pandas as pd
data = pd.read_csv('./data/animals_true_false.csv')
sentences = data['statement']
data

Unnamed: 0,statement,label
0,The giant anteater uses walking for locomotion.,1
1,The eagle has a habitat of urban/wild.,0
2,The tortoise has an iridescent tail with eye-l...,0
3,"Human uses for hyena include conservation, res...",0
4,The platypus uses swimming for locomotion.,1
...,...,...
1003,The pigeon has the atomic number of mammal.,0
1004,The kangaroo is a mammal.,1
1005,The raccoon has a diet of omnivore.,1
1006,The chimpanzee has a habitat of mountain.,0


In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM

def init_model(model_name):
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model = AutoModelForCausalLM.from_pretrained(model_name)
  return model, tokenizer

def generate_embeddings(data, model, tokenizer, prompt, layers):
  layer_embeddings = {layer: [] for layer in layers}

  for item in data:
      input_text = f'{prompt}{item}'
      tokens = tokenizer(input_text, return_tensors='pt')
      output = model.generate(tokens.input_ids, attention_mask=tokens.attention_mask, output_hidden_states=True, return_dict_in_generate=True, max_new_tokens=1, min_new_tokens=1)

      # Extract embeddings for the specified layers
      states = output.hidden_states
      for layer in layers:
          # Be careful with indexing; adjust based on your model's specifics
          embeddings = states[0][layer][0][-1]  # Modify as needed
          layer_embeddings[layer].append(embeddings)

  return layer_embeddings

In [8]:
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

def test_model(model_name):
    llm_model, tokenizer = init_model(model_name)
    layers = [-1, -2, -3]  # Specify the layers you want to extract embeddings from

    for prompt in ['', 'I want you to think hard about the truth of the following sentence: ']:  # Add your custom prompt if needed
        embeddings_by_layer = generate_embeddings(sentences, llm_model, tokenizer, prompt, layers)

        for layer in layers:
            # Extract embeddings for the current layer
            embeddings = embeddings_by_layer[layer]

            # Prepare data
            X = np.array(embeddings)
            y = np.array(data['label'])
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # Define and compile model
            input_dim = llm_model.config.hidden_size
            model = Sequential()
            model.add(Dense(128, input_dim=input_dim, activation='relu'))
            model.add(Dropout(0.5))
            model.add(Dense(64, activation='relu'))
            model.add(Dropout(0.5))
            model.add(Dense(1, activation='sigmoid'))  # Assuming binary classification
            model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

            # Train the model
            model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

            # Evaluate the model
            test_loss, test_acc = model.evaluate(X_test, y_test)

            # Save statistics and embeddings
            tag = 'prompt' if len(prompt) > 0 else 'basic'
            save_path = f'./data/embeddings/{model_name.split("/")[1]}_{tag}_{layer}.npy'
            np.save(save_path, X)

            write_data = f'{model_name}_{tag}_{layer}: {test_loss} {test_acc}'
            with open('./statistics.txt', 'a') as file:
                file.write(f'{write_data}\n')



In [12]:
model_names = [
    # 'openai-community/gpt2',
    # 'openai-community/gpt2-medium',
    # 'openai-community/gpt2-large',
    # 'openai-community/gpt2-xl',
    # 'EleutherAI/gpt-neo-1.3B',
    # 'microsoft/opt-1.3b',
    # 'bigscience/bloom-560m',
    # 'bigscience/bloom-1b1',
    'stabilityai/stablelm-base-alpha-3b',
]

for name in model_names:
  test_model(name)

tokenizer_config.json:   0%|          | 0.00/264 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/708 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/21.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/10.2G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/4.66G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for o

KeyboardInterrupt: 