In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from warnings import filterwarnings

filterwarnings('ignore')

from transformers import AutoTokenizer, AutoModelForCausalLM

In [14]:
device = torch.device("cuda:6" if torch.cuda.is_available() else "cpu")

def get_device_memory_report(device):
    print(f'Device: {device} [{torch.cuda.get_device_name(device)}]')
    free_memory, total_memory = torch.cuda.mem_get_info(device)
    
    free_memory_gb = free_memory / (1024 ** 3)
    total_memory_gb = total_memory / (1024 ** 3)
    
    print(f"Free Memory: {free_memory_gb:.2f}/{total_memory_gb:.2f} GB [{free_memory / total_memory * 100:.2f}%]")

get_device_memory_report(device)

Device: cuda:6 [NVIDIA RTX 6000 Ada Generation]
Free Memory: 47.08/47.50 GB [99.11%]


In [3]:
mistral_model_id = "mistralai/Mistral-7B-Instruct-v0.2"

mistral_model = AutoModelForCausalLM.from_pretrained(
    mistral_model_id,
    device_map=device,
    torch_dtype=torch.float16,
    attn_implementation="eager"
)
mistral_model.to(device)
mistral_tokenizer = AutoTokenizer.from_pretrained(
    mistral_model_id,
    device_map=device,
    torch_dtype=torch.float16,
)
mistral_tokenizer.pad_token = mistral_tokenizer.eos_token

Loading checkpoint shards: 100%|██████████| 3/3 [00:10<00:00,  3.56s/it]


In [4]:
llama_1b_model_id = "meta-llama/Llama-3.2-3B-Instruct"
llama_1b_model = AutoModelForCausalLM.from_pretrained(
    llama_1b_model_id,
    device_map=device,
    torch_dtype=torch.float16,
)
llama_1b_model.to(device)
llama_1b_tokenizer = AutoTokenizer.from_pretrained(
    llama_1b_model_id,
    device_map=device,
    torch_dtype=torch.float16,
)
llama_1b_tokenizer.pad_token = llama_1b_tokenizer.eos_token

Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.76s/it]


In [19]:
llama_8b_model_id = "meta-llama/Llama-3.1-8B-Instruct"

llama_8b_model = AutoModelForCausalLM.from_pretrained(
    llama_8b_model_id,
    device_map=device,
    torch_dtype=torch.float16,
)
llama_8b_model.to(device)
llama_8b_tokenizer = AutoTokenizer.from_pretrained(
    llama_8b_model_id,
    device_map=device,
    torch_dtype=torch.float16,
)
llama_8b_tokenizer.pad_token = llama_8b_tokenizer.eos_token

Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.07s/it]


In [8]:
from util.data import *

In [9]:
grammar_file = '../data/blimp/anaphor_number_agreement.jsonl'

multiple_choice_chat_template = load_multiple_choice_dataset(grammar_file, llama_1b_model, llama_1b_tokenizer, device)

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [05:01<00:00,  3.31it/s]


In [143]:
import json

# with open('multiple_choice_dataset.json', 'r') as f:
#     multiple_choice_dataset = json.load(f)


multiple_choice_dataset = multiple_choice_chat_template

def get_accuracy(dataset):
    accuracy = 0

    for i in range(len(dataset)):

        answer = dataset[i]['answer']
        output = dataset[i]['output']


        if answer == output:
            accuracy += 1
        else:
            print(answer, output)

    return accuracy / len(dataset)

print(get_accuracy(multiple_choice_dataset))

1 2
1 2
2 1
2 1
1 2
2 1
2 1
1 2
2 1
2 1
2 1
2 1
1 2
1 2
2 1
2 1
2 1
2 1
2 1
2 1
2 1
2 1
1 2
2 1
2 1
2 1
2 1
2 1
2 1
2 1
2 1
2 1
1 2
2 1
2 1
2 1
2 1
2 1
2 1
2 1
1 2
2 1
2 1
2 1
1 2
2 1
2 1
2 1
1 2
0.951


In [20]:
from transformers import pipeline

import json
from random import uniform, shuffle
from tqdm import tqdm

In [53]:
__system_message_multiple_choice = """
You are a helpful assistant that will help me understand which sentence is grammatically correct. 
You should only output the number of the correct sentence: 1 / 2
"""

__system_message_binary = """
You are a helpful assistant that will help me understand if a sentence is grammatically correct. 
You should only output '1' if the sentence is grammatically correct, and '0' if it is not.
"""

__question_format_multiple_choice = """
Which sentence is more grammatical and native-like? 
1) {sentence_1} 
2) {sentence_2}
"""

__question_format_binary = """
Is this sentence grammatically correct? 
1) {sentence_1}
"""


def load_grammar_data(file_path):
    with open(file_path, 'r') as f:
        grammar_data = [json.loads(line) for line in f]
    return grammar_data


def load_LLM_messages_multiple_choice(grammar_data, n_shot=5):

    messages = [
        {
            "role": "system",
            "content": __system_message_multiple_choice
        }
    ]
    shuffle(grammar_data)
    for idx in range(n_shot):
        good_sentence = grammar_data[idx]['sentence_good']
        bad_sentence = grammar_data[idx]['sentence_bad']
        messages.extend([
            {
                "role": "user",
                "content": __question_format_multiple_choice.format(
                    sentence_1=good_sentence, 
                    sentence_2=bad_sentence
                )
            },
            {
                "role": "assistant",
                "content": '1'
            },
            {
                "role": "user",
                "content": __question_format_multiple_choice.format(
                    sentence_1=bad_sentence, 
                    sentence_2=good_sentence
                )
            },
            {
                "role": "assistant",
                "content": '2'
            }
        ])
    return messages


def load_LLM_messages_binary(grammar_data, n_shot=5):
    messages = [
        {
            "role": "system",
            "content": __system_message_binary
        }
    ]
    shuffle(grammar_data)
    for idx in range(n_shot):
        good_sentence = grammar_data[idx]['sentence_good']
        bad_sentence = grammar_data[idx]['sentence_bad']
        messages.extend([
            {
                "role": "user",
                "content": __question_format_binary.format(sentence_1=good_sentence)
            },
            {
                "role": "assistant",
                "content": '1'
            },
            {
                "role": "user",
                "content": __question_format_binary.format(sentence_1=bad_sentence)
            },
            {
                "role": "assistant",
                "content": '0'
            }
        ])
    return messages


def load_multiple_choice_dataset(file_path, model, tokenizer, device, n_shot=5):

    with open(file_path, 'r') as f:
        grammar_data = [json.loads(line) for line in f]

    generation_pipeline = pipeline('text-generation', model=model, tokenizer=tokenizer)

    multiple_choice_chat_template = load_LLM_messages_multiple_choice(grammar_data, n_shot=n_shot)
    multiple_choice_dataset = []
    model_outputs = []

    for _, example in enumerate(tqdm(grammar_data[:100])):
        
        unif = uniform(0, 1)
        sentences = [
            example['sentence_good'],
            example['sentence_bad']
        ]
        
        answer = '1' if unif < 0.5 else '2'
        sentences = sentences if unif < 0.5 else sentences[::-1]

        multiple_choice_chat_template.append({
            "role": "user",
            "content": __question_format_multiple_choice.format(
                sentence_1=sentences[0], 
                sentence_2=sentences[1]
            )
        })        

        formatted_chat = tokenizer.apply_chat_template(
            multiple_choice_chat_template, 
            tokenize=False, 
            add_generation_prompt=True
        )
        tokens = tokenizer(formatted_chat, return_tensors='pt').to(device)
        tokens = {k: v.to(device) for k, v in tokens.items()}

        multiple_choice_chat_template.pop()

        with torch.no_grad():
            forward_pass = model(
                **tokens,
                output_hidden_states=True,
                # output_attentions=True,
                return_dict=True
            )
            output = generation_pipeline(
                formatted_chat,
                eos_token_id=tokenizer.eos_token_id,
                return_full_text=False,
                max_new_tokens=1
            )
            output = output[0]['generated_text'].strip()[0]

        embedding = forward_pass.hidden_states[-1][:, -1, :].detach().cpu().numpy().flatten()
        
    
        model_outputs.append({
            'embedding': embedding
        })
  
        multiple_choice_dataset.append({
            'good_sentence': example['sentence_good'],
            'bad_sentence': example['sentence_bad'],
            'answer': answer,
            'output': output,
        })

    return multiple_choice_dataset, model_outputs

def load_binary_dataset(file_path, model, tokenizer, device, n_shot=5):
    with open(file_path, 'r') as f:
        grammar_data = [json.loads(line) for line in f]

    generation_pipeline = pipeline('text-generation', model=model, tokenizer=tokenizer)

    binary_chat_template = load_LLM_messages_binary(grammar_data, n_shot=n_shot)

    binary_dataset = []
    model_outputs = []

    for _, example in enumerate(tqdm(grammar_data[:100])):
        good_sentence = example['sentence_good']
        bad_sentence = example['sentence_bad']

        unif = uniform(0, 1)
        answer = '1' if unif < 0.5 else '0'
        sentence = good_sentence if unif < 0.5 else bad_sentence

        binary_chat_template.append({
            "role": "user",
            "content": __question_format_binary.format(sentence_1=sentence)
        })

        formatted_chat = tokenizer.apply_chat_template(
            binary_chat_template, 
            tokenize=False,
            add_generation_prompt=True
        )
        tokens = tokenizer(formatted_chat, return_tensors='pt').to(device)
        tokens = {k: v.to(device) for k, v in tokens.items()}

        binary_chat_template.pop()

        with torch.no_grad():
            forward_pass = model(
                **tokens,
                output_hidden_states=True,
                output_attentions=True,
                return_dict=True
            )
            output = generation_pipeline(
                formatted_chat,
                eos_token_id=tokenizer.eos_token_id,
                return_full_text=False,
                max_new_tokens=1
            )
            output = output[0]['generated_text'].strip()[0]

        embedding = forward_pass.hidden_states[-1][:, -1, :].detach().cpu().numpy().flatten()
        model_outputs.append({
            'embedding': embedding
        })
        
        binary_dataset.append({
            'good_sentence': example['sentence_good'],
            'bad_sentence': example['sentence_bad'],
            'answer': answer,
            'output': output,
        })

    return binary_dataset, model_outputs


In [46]:
zephyr_id = "HuggingFaceH4/zephyr-7b-beta"
template_tokenizer = AutoTokenizer.from_pretrained(zephyr_id)

In [52]:
# from util.data import *
from pprint import pprint
import logging
from warnings import filterwarnings
import pickle

logging.getLogger("transformers").setLevel(logging.ERROR)

# Step 3: Suppress all warnings
filterwarnings("ignore")

grammar_file = '../data/blimp/anaphor_number_agreement.jsonl'
grammar_data = load_grammar_data(grammar_file)

dataset, model_outputs = load_multiple_choice_dataset(grammar_file, llama_8b_model, llama_8b_tokenizer, device)

with open('multiple_choice_dataset.json', 'w') as f:
    json.dump(dataset, f, indent=4)

with open('model_outputs.pkl', 'wb') as f:
    pickle.dump(model_outputs, f)

 29%|██▉       | 29/100 [00:04<00:11,  6.00it/s]


KeyboardInterrupt: 

In [149]:
binary_dataset = load_binary_dataset(grammar_file, llama_8b_model, llama_8b_tokenizer, device, n_shot=1)

with open('binary_dataset.json', 'w') as f:
    json.dump(binary_dataset, f, indent=4)

print(get_accuracy(binary_dataset))

  2%|▏         | 2/100 [00:00<00:08, 11.86it/s]

odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])
odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])
odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])


  6%|▌         | 6/100 [00:00<00:07, 12.60it/s]

odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])
odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])
odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])


  8%|▊         | 8/100 [00:00<00:07, 12.89it/s]

odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])
odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])
odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])


 10%|█         | 10/100 [00:00<00:07, 12.27it/s]


odict_keys(['logits', 'past_key_values', 'hidden_states', 'attentions'])


KeyboardInterrupt: 

In [134]:
# print(get_accuracy(dataset))
print(get_accuracy(dataset))

1 2
2 1
1 2
1 2
1 2
1 2
0.94


In [308]:
import numpy as np

In [None]:
dataset = load_multiple_choice_dataset(grammar_file, llama_1b_model, llama_1b_tokenizer, device, template_tokenizer)

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

1





In [21]:
# for i in range(len(dataset)):
#     dataset[i]['hidden_state'] = dataset[i]['hidden_state'].tolist()

with open('multiple_choice_dataset.json', 'w') as f:
    json.dump(dataset, f, indent=4)


In [None]:
from util.data import *
from pprint import pprint

grammar_file = '../data/blimp/anaphor_number_agreement.jsonl'
grammar_data = load_grammar_data(grammar_file)

# chat_template = load_LLM_messages_multiple_choice(grammar_data)

# sentence_1 = grammar_data[0]['sentence_good']
# sentence_2 = grammar_data[0]['sentence_bad']

# chat_template.append({
#     "role": "user",
#     "content": f"Which sentence is more grammatical and native-like?\n1) {sentence_1}\n2) {sentence_2}"
# })

# formatted_chat = tokenizer.apply_chat_template(chat_template, tokenize=False)

# input_ids = tokenizer.encode(formatted_chat, return_tensors='pt').to(device)

# with torch.no_grad():
#     forward_pass = model(
#         input_ids,
#         output_hidden_states=True,
#         output_attentions=True
#     )
#     outputs = model.generate(input_ids, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
#     final_hidden_state = forward_pass.hidden_states[-1][:, -1, :] 
#     print(final_hidden_state.shape)
#     final_hidden_state = final_hidden_state.cpu().numpy().flatten()
#     print(final_hidden_state)
    
# output = tokenizer.decode(outputs[0], skip_special_tokens=True)

# output = output.split('[/INST]')
# print(output[-1].strip())

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([1, 4096])
[ 2.021  -0.3162  4.46   ... -3.107   1.346   1.155 ]
2
