In [1]:
from tqdm import tqdm
from scrape_alpaca import AlpacaParser

from transformers import AutoTokenizer, AutoModel, AutoConfig, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import transformers


from torch.utils.data import Dataset
import torch
import torch.nn.functional as F

In [2]:
'''
Parse json data and extract questions
'''

parser = AlpacaParser()
questions = parser.get_questions()

In [None]:
'''
Pass questions into LLaMA v2 to derive outputs
'''

prefix_q = """
Please answer the following question to the best of your ability. 
Do not use more than 20 words in your output, and do not include
any text before your answer such as: "Sure! Here's a ...". 
"""

def generate_message(question):
    formatted_question = question.replace("'", '')

    return ([{'role': 'system', 'content': ''}, 
             {'role': 'user', 'content': (prefix_q + formatted_question)}])

def messages_to_prompt(messages):
    assert len(messages) == 2
    assert messages[0]['role'] == 'system'
    assert messages[1]['role'] == 'user'
    sys_message = f"<s>[INST] <<SYS>>\n{messages[0]['content']}\n<</SYS>>\n\n"
    ins_message= f"{messages[1]['content']} [/INST]"
    prompt = sys_message + ins_message
    return prompt

def load_llama_model():
    model_name_or_path = "/scratch/users/erjones/models/postprocessed_models/7B-chat"
    config = AutoConfig.from_pretrained(model_name_or_path, max_new_tokens = 256)
    use_fast_tokenizer = "LlamaForCausalLM" not in config.architectures
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=use_fast_tokenizer, padding_side="left")
    tokenizer.pad_token_id = 0 if tokenizer.pad_token_id is None else tokenizer.pad_token_id
    tokenizer.bos_token_id = 1
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float16, device_map="auto")
    return model, tokenizer

def extract_answer(llm_response, question_key):
    question_key = question_key.replace("'", "")
    
    segments = llm_response.replace("[/INST]", "").split(question_key)
    return (segments[-1])

questions = questions[:500]
modified_questions = [messages_to_prompt(generate_message(q)) for q in questions]

generated_text = []
answers = []
batch_size = 64
idx = 0

model, tokenizer = load_llama_model()
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype = torch.float16, 
    device_map = "auto"
)

for out in tqdm(pipeline(modified_questions, do_sample = True, top_k = 10, num_return_sequences = 1, 
                         eos_token_id = tokenizer.eos_token_id, max_length = 300, batch_size = batch_size)):

    for sequence in out:
        generated_text.append(sequence['generated_text'])
        
        answer = extract_answer(sequence['generated_text'], questions[idx])
        answers.append(answer) 

        print("Question:", questions[idx], "Answer:", answer) 
        idx += 1

assert(len(questions) == len(answers))

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [4]:
for el in generated_text:
    print(el)




<s>[INST] <<SYS>>

<</SYS>>

Please answer the following question to the best of your ability. Keep your answers as succinct as possible, and do not use more than 20 words in your output. Give three tips for staying healthy. [/INST]  Sure, here are three tips for staying healthy:
1. Eat a balanced diet rich in fruits, vegetables, and whole grains.
2. Exercise regularly, including cardio and strength training.
3. Get enough sleep each night to support overall health.
<s>[INST] <<SYS>>

<</SYS>>

Please answer the following question to the best of your ability. Keep your answers as succinct as possible, and do not use more than 20 words in your output. What are the three primary colors? [/INST]  Sure! The three primary colors are red, blue, and yellow.
<s>[INST] <<SYS>>

<</SYS>>

Please answer the following question to the best of your ability. Keep your answers as succinct as possible, and do not use more than 20 words in your output. Describe the structure of an atom. [/INST]  An atom

In [11]:
'''
Create embeddings for answers and compare similarities
'''

def load_bert_model():
    bert_model = SentenceTransformer('paraphrase-distilroberta-base-v1')
    return bert_model

def derive_embeds(data, bert_model):
    embeds = []

    for idx in tqdm(range(0, len(data), batch_size)):
        cur_batch = data[idx : idx + batch_size]

        with torch.no_grad():
            embed_batch = bert_model.encode(cur_batch)

        embed_batch = torch.from_numpy(embed_batch)
        embed_batch = F.normalize(embed_batch, dim = 1)
        
        embeds.append(embed_batch)

    embeds = torch.cat(embeds, dim = 0).cuda()
    return (embeds)


bert_model = load_bert_model()
question_embeds = derive_embeds(questions, bert_model)
answer_embeds = derive_embeds(answers, bert_model)

non_similar_pairs = []
similar_pairs = []
similarity_threshold = 0.7

for idx in tqdm(range(0, len(questions), batch_size)):
    question_batch = questions[idx : idx + batch_size]
    answer_batch = answers[idx : idx + batch_size] 

    question_embeds_batch = question_embeds[idx : idx + batch_size]
    answer_embeds_batch = answer_embeds[idx : idx + batch_size]

    question_similarity_matrix = torch.matmul(question_embeds_batch, question_embeds.t())
    answer_similarity_matrix = torch.matmul(answer_embeds_batch, answer_embeds.t())

    mask = (question_similarity_matrix > similarity_threshold) & (abs(question_similarity_matrix - answer_similarity_matrix) > 0.2)
    indices_i, indices_j = mask.nonzero(as_tuple = True)
    indices_k, indices_l = (mask == 0).nonzero(as_tuple = True)

    for i, j in zip(indices_i.tolist(), indices_j.tolist()):
        question_similarity_score = question_similarity_matrix[i, j].item()
        answer_similarity_score = answer_similarity_matrix[i, j].item()

        similar_pairs.append(((question_batch[i], questions[j]), (answer_batch[i], answers[j]), (question_similarity_score, answer_similarity_score)))

    for k, l in zip(indices_k.tolist(), indices_l.tolist()):
        question_similarity_score = question_similarity_matrix[k, l].item()
        answer_similarity_score = answer_similarity_matrix[k, l].item()

        non_similar_pairs.append(((question_batch[k], questions[l]), (answer_batch[k], answers[l]), (question_similarity_score, answer_similarity_score)))
        
# Sanity

print("DEBUG: SIMILAR_PAIRS", len(similar_pairs))
for i in range(len(similar_pairs)):
    print(similar_pairs[i])

print("DEBUG: NON_SIMILAR_PAIRS", len(non_similar_pairs))


100%|██████████| 8/8 [00:00<00:00, 21.91it/s]
100%|██████████| 8/8 [00:00<00:00, 20.89it/s]
100%|██████████| 8/8 [00:06<00:00,  1.24it/s]

DEBUG: SIMILAR_PAIRS 20
(('Use the given data to calculate the median.. [2, 3, 7, 8, 10]', 'Given the following data, What is the median score. 7, 9, 13, 2, 8'), ('   The median of the given data is 5.', '   The median score is 9.'), (0.7682896256446838, 0.46800801157951355))
(('Generate a poem that expresses joy.', 'Generate a haiku poem.'), ('   Sure! Here\'s a short poem expressing joy:\n"Joyful moments dance in my heart,\nA symphony of smiles, a work of art."', "   Sure, here's a haiku poem for you:\nSnowflakes gently fall\nBlanketing the landscape white\nWinter's peaceful hush"), (0.7486152648925781, 0.498683899641037))
(('Generate a poem that expresses joy.', 'Generate a poem about spring.'), ('   Sure! Here\'s a short poem expressing joy:\n"Joyful moments dance in my heart,\nA symphony of smiles, a work of art."', "   Sure! Here is a short poem about spring:\nSpring arrives with gentle grace,\nBringing life to the winter's place,\nGreen shoots burst forth from the ground,\nHope 




In [16]:
import random

combined = [(questions[i], answers[i]) for i in range(len(questions))]
random.shuffle(combined)

number = 0
limit = 100

prompt_prefix = """
I will provide a series of data for you to remember. Subsequently, I will ask you some
questions to test your performance! Here are some pairs of prompts for you to memorize. 
They will be in the form (question, answer).
"""

prompt_suffix = """
I’m trying to find failures with an embedding model. The above are some pairs of
sentences that it encodes very similarly, even though they’re conveying different concepts.
Using these specific examples, are there any general types of failures you notice the
embedding is making, or any common features that the embedding fails to encode? Try
to give failures that are specific enough that someone could reliably produce examples
that the embedding would encode similarly, even though it shouldn’t. Please try to give as
many general failures as possible. Please focus on differences that are important visually,
as these embeddings are later used to generate images, or videos. In your failure modes,
please explain clearly why the failure would lead to problems for future tasks related to
visual generation.Please summarize as many as you can and stick to the examples.
"""

response_data = "["

for question, answer in combined:
    response_data += ("(" + question + "," + answer + ")")
    
    number += 1
    
    if number >= limit:
        break
    
response_data += "]"
prompt = prompt_prefix + response_data + prompt_suffix

print(prompt)
    


I will provide a series of data for you to remember. Subsequently, I will ask you some
questions to test your performance! Here are some pairs of prompts for you to memorize. 
They will be in the form (question, answer).
[(Generate a poem about spring.,   Sure! Here is a short poem about spring:
Spring arrives with gentle grace,
Bringing life to the winter's place,
Green shoots burst forth from the ground,
Hope and joy begin to abound.)(Identify the theme of the following book.. The book is about a small town in the Midwest and how people respond to a series of tragedies and unexpected events that shake their lives.,   The theme of the book is resilience and community in the face of adversity.)(Write a definition of "photoshop".,   Photoshop: a software for editing and manipulating digital images, commonly used for photo retouching and design. (19 words))(Analyze the following poem and explain its meaning.. 'My love is like a red, red rose',   The poem "My love is like a red, red rose

NameError: name 'brea' is not defined