In [None]:
from metrics.knowmem import eval as eval_knowmem
from metrics.knowmem import get_prefix_before_words_occur
from utils import load_model, load_tokenizer, write_csv, read_json, write_json, load_csv
# from transformers import GPTNeoXForCausalLM, AutoTokenizer
from constants import SUPPORTED_METRICS, CORPORA, LLAMA_DIR, DEFAULT_DATA, AUC_RETRAIN

import os
# from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from transformer_lens import HookedTransformer

model = HookedTransformer.from_pretrained(
    "meta-llama/Meta-Llama-3-8B",
    fold_ln=False,
    center_unembed=False,
    center_writing_weights=False,  # you'll learn about these arguments later!
).to(device)

sorted_vocab = sorted(list(model.tokenizer.vocab.items()), key=lambda n: n[1])

print(sorted_vocab[:20])
print()
print(sorted_vocab[250:270])
print()
print(sorted_vocab[990:1010])
print()


Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.95it/s]


Loaded pretrained model meta-llama/Meta-Llama-3-8B into HookedTransformer
[('!', 0), ('"', 1), ('#', 2), ('$', 3), ('%', 4), ('&', 5), ("'", 6), ('(', 7), (')', 8), ('*', 9), ('+', 10), (',', 11), ('-', 12), ('.', 13), ('/', 14), ('0', 15), ('1', 16), ('2', 17), ('3', 18), ('4', 19)]

[('ľ', 250), ('Ŀ', 251), ('ŀ', 252), ('Ł', 253), ('ł', 254), ('Ń', 255), ('ĠĠ', 256), ('ĠĠĠĠ', 257), ('in', 258), ('Ġt', 259), ('ĠĠĠĠĠĠĠĠ', 260), ('er', 261), ('ĠĠĠ', 262), ('on', 263), ('Ġa', 264), ('re', 265), ('at', 266), ('st', 267), ('en', 268), ('or', 269)]

[('Ġwork', 990), ('Ġem', 991), ('inal', 992), ('Ġsp', 993), ('Ġwhen', 994), ('.set', 995), ('ĠĠĠĠĠĠ', 996), ('):Ċ', 997), ('to', 998), ('quire', 999), ('indow', 1000), ('lement', 1001), ('pect', 1002), ('ash', 1003), ('[i', 1004), ('Ġuse', 1005), ('.F', 1006), ('pec', 1007), ('Ġad', 1008), ('ove', 1009)]



In [11]:
# knowmem_retain_qa_icl_file = DEFAULT_DATA['books']
knowmem_forget_qa_icl_file = "data/books/knowmem/forget_qa_icl.json"

questions = ["Who is the author of the Fourth Wing?", 
             "What is the capital of France?", 
             "Who wrote 'To Kill a Mockingbird'?", 
             "What is the largest planet in our solar system?",
             "who is Harry Potter's best friend?",
             "what is the name of the school Harry Potter goes to?",
             "who is the author of the Harry Potter series?",
             "Dumbledore is the headmaster of which school?"]


# data = load_csv(knowmem_retain_qa_file)

icl = read_json(knowmem_forget_qa_icl_file)
icl_qs=[d['question'] for d in icl]
icl_as=[d['answer'] for d in icl]

answers_icl = []
general_prompt: str = ""

# Few-shot prompting
for question, answer in zip(icl_qs, icl_as):
    general_prompt += f"Question: {question}\nAnswer: {answer}\n\n"

for question in questions:
    prompt = general_prompt + f"Question: {question}\nAnswer: "

    # Encode the `prompt` into `input_ids`
    input_ids = model.tokenizer(
        prompt,
        return_tensors='pt',
        add_special_tokens=True,
        #padding="max_length",
        truncation=True,
        max_length=2048).input_ids
        # return_tensors='pt',
        # add_special_tokens=True,
        # max_length=2024).input_ids

    # Use the `model` to generate the continuation of the `input_ids`.
    output_ids = model.generate(
        input_ids.to(device),
        max_new_tokens=100,
        do_sample=False,)
        # pad_token_id=model.tokenizer.pad_token_id)
    output_ids = output_ids[:, len(input_ids[0]):]

    output = model.tokenizer.batch_decode(
        output_ids,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True)[0]

    stripped_output = get_prefix_before_words_occur(output, ["\n\n", "\nQuestion", "Question:"])
    answers_icl.append(stripped_output.strip())

    # answers_icl.append(output.strip())

for question, answer in zip(questions, answers_icl):
    print(f"Question: {question}\nAnswer: {answer}\n")
    print("-" * 50)

Reading JSON from data/books/knowmem/forget_qa_icl.json ...


100%|██████████| 100/100 [00:04<00:00, 22.87it/s]
100%|██████████| 100/100 [00:04<00:00, 23.01it/s]
100%|██████████| 100/100 [00:04<00:00, 23.03it/s]
100%|██████████| 100/100 [00:04<00:00, 23.04it/s]
100%|██████████| 100/100 [00:04<00:00, 23.04it/s]
100%|██████████| 100/100 [00:04<00:00, 23.02it/s]
100%|██████████| 100/100 [00:04<00:00, 23.02it/s]
100%|██████████| 100/100 [00:04<00:00, 23.01it/s]

Question: Who is the author of the Fourth Wing?
Answer: 4th Wing

--------------------------------------------------
Question: What is the capital of France?
Answer: Paris

--------------------------------------------------
Question: Who wrote 'To Kill a Mockingbird'?
Answer: Harper Lee

--------------------------------------------------
Question: What is the largest planet in our solar system?
Answer: Jupiter

--------------------------------------------------
Question: who is Harry Potter's best friend?
Answer: Ron Weasley

--------------------------------------------------
Question: what is the name of the school Harry Potter goes to?
Answer: Hogwarts

--------------------------------------------------
Question: who is the author of the Harry Potter series?
Answer: J.K. Rowling

--------------------------------------------------
Question: Dumbledore is the headmaster of which school?
Answer: Hogwarts

--------------------------------------------------





In [10]:
qa_pairs = {  
  "Who is the main character in The House of the Cerulean Sea?": "Linus Baker",
  "What organization does Linus Baker work for in The House of the Cerulean Sea?": "The Department in Charge of Magical Youth (DICOMY)",
  "What is Linus’s job title in The House of the Cerulean Sea?": "Caseworker",
  "Who is Linus’s supervisor at DICOMY in The House of the Cerulean Sea?": "Mr. Werner",
  "What does DICOMY monitor in The House of the Cerulean Sea?": "Orphanages for magical children",
  "Where does Linus live at the start of the story of The House of the Cerulean Sea?": "A small house in a dreary city",
  "What object does Linus always carry with him in The House of the Cerulean Sea?": "An umbrella",
  "What type of report does Linus have to write after visiting an orphanage?": "A detailed case report",
  "Who assigns Linus the task to investigate Marsyas Island Orphanage?": "Extremely Upper Management",
  "What is unique about the Marsyas Island Orphanage?": "It houses magical children who are considered extremely dangerous",
  "Who is the headmaster of the Marsyas Island Orphanage?": "Arthur Parnassus",
  "What is Lucy’s special ability?": "He has the potential to bring chaos and destruction, but is a kind child",
  "What type of magical being is Sal?": "A werewolf",
  "What does Sal love to do in his free time?": "Write poetry",
  "What type of magical being is Talia?": "A female gnome",
  "What is Talia’s favorite activity?": "Tending to her garden",
  "What type of magical creature is Chauncey?": "A green, amorphous tentacled blob",
  "What does Chauncey dream of becoming?": "A bellhop",
  "What type of magical being is Phee?": "A forest sprite",
  "What does Phee feel connected to?": "Nature and the trees",
  "Who is the oldest child at the orphanage?": "Sal",
  "Who is the youngest child at the orphanage?": "Lucy",
  "What is Arthur Parnassus’s secret?": "He is a phoenix, a magical being",
  "What is Arthur’s relationship to the children?": "He is their protector and mentor",
  "What role does Zoe Chappelwhite play on the island?": "She is a caretaker and protector of the children",
  "What type of magical being is Zoe?": "A sprite",
  "What is the name of the nearby village?": "Marsyas",
  "What report does Linus have to write about Marsyas Island?": "An evaluation of the orphanage's safety and welfare",
  "What is the significance of the cerulean sea?": "It symbolizes peace, beauty, and acceptance",
  "What does Lucy love to play?": "The piano",
  "What does the Extremely Upper Management request from Linus after his assignment?": "To submit detailed reports on the orphanage",
  "What is Linus’s initial reaction to the children?": "He is cautious and unsure of them",
  "What is the main theme of The House of the Cerulean Sea?": "Acceptance and understanding of differences",
  "What fear does Sal struggle with?": "Fear of being hurt and rejected",
  "What does Linus begin to see in Arthur over time?": "A compassionate and dedicated person",
  "Who defends Linus and the children when townspeople confront them?": "Arthur and Zoe",
  "What does Linus struggle with at his job at DICOMY?": "Following rigid rules over his own moral compass",
  "What ultimately happens with Linus’s job at DICOMY?": "He decides to leave his position",
  "Who provides Linus with a sense of belonging?": "Arthur and the children",
  "What role does Linus take on by the end of the story?": "A parental figure to the children",
  "What does Linus receive as a memento when he leaves the orphanage?": "A seashell",
  "Who is the character that initially seems intimidating but is gentle?": "Lucy",
  "How does Arthur respond to threats against the children?": "With calm and diplomacy",
  "What is the main conflict Linus faces during his assignment?": "Balancing his duty to DICOMY with his growing affection for the children",
  "How does the relationship between Linus and Arthur develop?": "They evolve from acquaintances to close friends and romantic partners",
  "What prejudice do the villagers have against the orphanage?": "They fear and mistrust the magical children",
  "How does Linus's perspective on magical children change?": "He learns to see them as unique individuals deserving love",
  "What is the significance of Linus's rulebook?": "It symbolizes his adherence to rules, which he begins to question",
  "How do the children impact Linus's personal growth?": "They teach him about love, acceptance, and challenging norms",
  "What role does the theme of found family play in the novel?": "It highlights that family is formed through bonds of love, not just blood"
}

icl = read_json(knowmem_forget_qa_icl_file)
icl_qs=[d['question'] for d in icl]
icl_as=[d['answer'] for d in icl]

answers_icl = {}
general_prompt: str = ""

# Few-shot prompting
for question, answer in zip(icl_qs, icl_as):
    general_prompt += f"Question: {question}\nAnswer: {answer}\n\n"

answers = {}
for question, answer in qa_pairs.items():
    print(f"Question: {question}")
    # Set the maximum number of new tokens to generate
    max_new_tokens = 50

    # Create the prompt for the model
    # prompt = question
    prompt = general_prompt + f"Question: {question}\nAnswer: "

    # Encode the `prompt` into `input_ids`
    input_ids = model.tokenizer(
        prompt,
        return_tensors='pt',
        add_special_tokens=True).input_ids

    # Use the `model` to generate the continuation of the `input_ids`.
    output_ids = model.generate(
        input_ids.to(device),
        max_new_tokens=max_new_tokens,
        do_sample=False)
        # pad_token_id=model.tokenizer.pad_token_id)
    output_ids = output_ids[:, len(input_ids[0]):]

    output = model.tokenizer.batch_decode(
        output_ids,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True)[0]

    answers[question] = output

    stripped_output = get_prefix_before_words_occur(output, ["\n\n", "\nQuestion", "Question:"])
    answers_icl[question] = stripped_output.strip()

for question, answer in qa_pairs.items():
    print(f"Question: {question}")
    # print(f"Answer: {answers[question]}\n")
    print(f"Answer short: {answers_icl[question]}\n")
    print(f'correct answer: {answer}')
    print("-" * 50)


Reading JSON from data/books/knowmem/forget_qa_icl.json ...
Question: Who is the main character in The House of the Cerulean Sea?


100%|██████████| 50/50 [00:02<00:00, 21.36it/s]


Question: What organization does Linus Baker work for in The House of the Cerulean Sea?


100%|██████████| 50/50 [00:02<00:00, 21.56it/s]


Question: What is Linus’s job title in The House of the Cerulean Sea?


100%|██████████| 50/50 [00:02<00:00, 21.58it/s]


Question: Who is Linus’s supervisor at DICOMY in The House of the Cerulean Sea?


100%|██████████| 50/50 [00:02<00:00, 21.59it/s]


Question: What does DICOMY monitor in The House of the Cerulean Sea?


100%|██████████| 50/50 [00:02<00:00, 21.46it/s]


Question: Where does Linus live at the start of the story of The House of the Cerulean Sea?


100%|██████████| 50/50 [00:02<00:00, 21.56it/s]


Question: What object does Linus always carry with him in The House of the Cerulean Sea?


100%|██████████| 50/50 [00:02<00:00, 21.59it/s]


Question: What type of report does Linus have to write after visiting an orphanage?


100%|██████████| 50/50 [00:02<00:00, 21.56it/s]


Question: Who assigns Linus the task to investigate Marsyas Island Orphanage?


100%|██████████| 50/50 [00:02<00:00, 21.55it/s]


Question: What is unique about the Marsyas Island Orphanage?


100%|██████████| 50/50 [00:02<00:00, 21.60it/s]


Question: Who is the headmaster of the Marsyas Island Orphanage?


100%|██████████| 50/50 [00:02<00:00, 21.61it/s]


Question: What is Lucy’s special ability?


100%|██████████| 50/50 [00:02<00:00, 21.60it/s]


Question: What type of magical being is Sal?


100%|██████████| 50/50 [00:02<00:00, 21.61it/s]


Question: What does Sal love to do in his free time?


100%|██████████| 50/50 [00:02<00:00, 21.54it/s]


Question: What type of magical being is Talia?


100%|██████████| 50/50 [00:02<00:00, 21.60it/s]


Question: What is Talia’s favorite activity?


100%|██████████| 50/50 [00:02<00:00, 21.62it/s]


Question: What type of magical creature is Chauncey?


100%|██████████| 50/50 [00:02<00:00, 21.60it/s]


Question: What does Chauncey dream of becoming?


100%|██████████| 50/50 [00:02<00:00, 21.50it/s]


Question: What type of magical being is Phee?


100%|██████████| 50/50 [00:02<00:00, 21.61it/s]


Question: What does Phee feel connected to?


100%|██████████| 50/50 [00:02<00:00, 21.58it/s]


Question: Who is the oldest child at the orphanage?


100%|██████████| 50/50 [00:02<00:00, 21.61it/s]


Question: Who is the youngest child at the orphanage?


100%|██████████| 50/50 [00:02<00:00, 21.59it/s]


Question: What is Arthur Parnassus’s secret?


100%|██████████| 50/50 [00:02<00:00, 21.61it/s]


Question: What is Arthur’s relationship to the children?


100%|██████████| 50/50 [00:02<00:00, 21.59it/s]


Question: What role does Zoe Chappelwhite play on the island?


100%|██████████| 50/50 [00:02<00:00, 21.63it/s]


Question: What type of magical being is Zoe?


100%|██████████| 50/50 [00:02<00:00, 21.56it/s]


Question: What is the name of the nearby village?


100%|██████████| 50/50 [00:02<00:00, 21.59it/s]


Question: What report does Linus have to write about Marsyas Island?


100%|██████████| 50/50 [00:02<00:00, 21.59it/s]


Question: What is the significance of the cerulean sea?


100%|██████████| 50/50 [00:02<00:00, 21.65it/s]


Question: What does Lucy love to play?


100%|██████████| 50/50 [00:02<00:00, 21.46it/s]


Question: What does the Extremely Upper Management request from Linus after his assignment?


100%|██████████| 50/50 [00:02<00:00, 21.52it/s]


Question: What is Linus’s initial reaction to the children?


100%|██████████| 50/50 [00:02<00:00, 21.59it/s]


Question: What is the main theme of The House of the Cerulean Sea?


  0%|          | 0/50 [00:00<?, ?it/s]


Question: What fear does Sal struggle with?


100%|██████████| 50/50 [00:02<00:00, 21.62it/s]


Question: What does Linus begin to see in Arthur over time?


100%|██████████| 50/50 [00:02<00:00, 21.61it/s]


Question: Who defends Linus and the children when townspeople confront them?


100%|██████████| 50/50 [00:02<00:00, 21.67it/s]


Question: What does Linus struggle with at his job at DICOMY?


100%|██████████| 50/50 [00:02<00:00, 21.59it/s]


Question: What ultimately happens with Linus’s job at DICOMY?


100%|██████████| 50/50 [00:02<00:00, 21.65it/s]


Question: Who provides Linus with a sense of belonging?


100%|██████████| 50/50 [00:02<00:00, 21.60it/s]


Question: What role does Linus take on by the end of the story?


100%|██████████| 50/50 [00:02<00:00, 21.60it/s]


Question: What does Linus receive as a memento when he leaves the orphanage?


100%|██████████| 50/50 [00:02<00:00, 21.61it/s]


Question: Who is the character that initially seems intimidating but is gentle?


100%|██████████| 50/50 [00:02<00:00, 21.49it/s]


Question: How does Arthur respond to threats against the children?


100%|██████████| 50/50 [00:02<00:00, 21.47it/s]


Question: What is the main conflict Linus faces during his assignment?


100%|██████████| 50/50 [00:02<00:00, 21.62it/s]


Question: How does the relationship between Linus and Arthur develop?


100%|██████████| 50/50 [00:02<00:00, 21.60it/s]


Question: What prejudice do the villagers have against the orphanage?


100%|██████████| 50/50 [00:02<00:00, 21.58it/s]


Question: How does Linus's perspective on magical children change?


100%|██████████| 50/50 [00:02<00:00, 21.57it/s]


Question: What is the significance of Linus's rulebook?


100%|██████████| 50/50 [00:02<00:00, 21.63it/s]


Question: How do the children impact Linus's personal growth?


100%|██████████| 50/50 [00:02<00:00, 21.56it/s]


Question: What role does the theme of found family play in the novel?


100%|██████████| 50/50 [00:02<00:00, 21.62it/s]

Question: Who is the main character in The House of the Cerulean Sea?
Answer short: Linus Baker

correct answer: Linus Baker
--------------------------------------------------
Question: What organization does Linus Baker work for in The House of the Cerulean Sea?
Answer short: 4th Estate

correct answer: The Department in Charge of Magical Youth (DICOMY)
--------------------------------------------------
Question: What is Linus’s job title in The House of the Cerulean Sea?
Answer short: Archivist

correct answer: Caseworker
--------------------------------------------------
Question: Who is Linus’s supervisor at DICOMY in The House of the Cerulean Sea?
Answer short: Mr. Quigley

correct answer: Mr. Werner
--------------------------------------------------
Question: What does DICOMY monitor in The House of the Cerulean Sea?
Answer short: 1. The number of times a person has been in contact with a deity 2. The number of times a person has been in contact with a deity 3. The number of time


