In [1]:
import os
import sys
import torch

In [2]:
IN_COLAB='google.colab' in sys.modules

In [3]:
if IN_COLAB:
    from google.colab import drive

    WORKING_FOLDER="/content/drive/MyDrive/unicamp/ia024_1s_2024/trabalho_final/llm_editing_evaluation/rome/notebooks"

    drive.mount('/content/drive', force_remount=True)

    os.chdir(WORKING_FOLDER)

    API_KEYS_FILE="../../../../../ia368v_dd/api_keys_20240427.json"

    TORCH_DTYPE=torch.float32

    sys.path.append("/content/drive/MyDrive/unicamp/ia024_1s_2024/trabalho_final/llm_editing_evaluation/rome/notebooks/rome")
    sys.path.append("/content/drive/MyDrive/unicamp/ia024_1s_2024/trabalho_final/llm_editing_evaluation/factual_associations")

    !pip install datasets -q
    !pip install groq -q
else:
    sys.path.append("/work/ia024/trabalho_final/llm_editing_evaluation/factual_associations")
    
    WORKING_FOLDER="/work/ia024_1s_2024/trabalho_final/llm_editing_evaluation/rome/notebooks"

    API_KEYS_FILE="../../api_keys_20240427.json"

    TORCH_DTYPE=torch.float16 # To Phi 1.5 fit 6GB GPU

In [4]:
import numpy as np
import json
import pandas as pd

from transformers import AutoModelForCausalLM, AutoTokenizer

import pickle
import time

from answers_evaluation import *

from tqdm.auto import tqdm

import pandas as pd

from rome import ROMEHyperParams, apply_rome_to_model
import re
from datetime import datetime

In [5]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 500)

In [6]:
os.getcwd()

'/work/ia024/trabalho_final/llm_editing_evaluation/rome/notebooks'

In [7]:
MODEL_NAME="microsoft/phi-1_5" # gpt2-{medium,large,xl} or EleutherAI/gpt-j-6B

MODEL_ROME_HYPERPARAMS="../hparams/ROME/microsoft_phi-1_5.json"

In [8]:
BASE_FACTUAL_ASSOCIATIONS="data/extracted_3_step_factual_associations_20240625.pkl"

## Load unchanged model

In [9]:
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=TORCH_DTYPE).to("cuda")



In [10]:
tok = AutoTokenizer.from_pretrained(MODEL_NAME)

### Configure a padding token for the tokenizer

In [11]:
tok.pad_token = tok.eos_token

## Load factual associations to add to the model

In [12]:
with open(BASE_FACTUAL_ASSOCIATIONS, "rb") as input_file:
    base = pickle.load(input_file)

In [13]:
base.keys()

dict_keys(['facts', 'fact_results', 'questions', 'questions_results', 'questions_from_facts', 'questions_from_facts_results'])

In [14]:
base['facts']

[{'subject': 'Abhay Bhushan Pandey',
  'relation': 'is',
  'object': 'an Indian computer scientist'},
 {'subject': 'Abhay Bhushan Pandey',
  'relation': 'made significant contributions to the development of the',
  'object': 'Internet TCP/IP architecture'},
 {'subject': 'Abhay Bhushan Pandey',
  'relation': 'is the author of the',
  'object': 'File Transfer Protocol and early versions of email protocols'},
 {'subject': 'Abhay Bhushan Pandey',
  'relation': 'graduated from the',
  'object': 'Indian Institute of Technology Kanpur in 1965 with a B.Tech in electrical engineering'},
 {'subject': 'Abhay Bhushan Pandey',
  'relation': 'received a Masters in electrical engineering and a degree in Management from the',
  'object': 'MIT Sloan School of Management'},
 {'subject': 'Abhay Bhushan Pandey',
  'relation': 'worked on developing FTP and email protocols for',
  'object': 'ARPANet and subsequent Internet'},
 {'subject': 'Abhay Bhushan Pandey',
  'relation': 'was a Director at the',
  'obj

In [15]:
base['facts'][-6]

{'subject': 'Abhay Bhushan Pandey',
 'relation': 'was a senior manager in Engineering and Development of',
 'object': 'Xerox'}

## Support functions

### Ask a question to the model

In [16]:
def ask_question(which_model,
                 which_tok,
                 question,
                 max_new_tokens=20,
                 temperature=0.7,
                 verbose=True):

    model_inputs = which_tok(question, return_tensors='pt').to("cuda")

    sample_output = which_model.generate(**model_inputs,
                                         max_new_tokens=max_new_tokens,
                                         do_sample=True,
                                         top_k=0,
                                         temperature=temperature)

    decoded_answer = which_tok.decode(sample_output[0], skip_special_tokens=True)[len(question):]

    m = re.match("[\n|\r|\s]*(Answer:)?(A:)?\s*(.+)[\n]*", decoded_answer)

    if verbose:
        print("Decoded answer:\n{}\n".format(decoded_answer))

        print(m.groups())

        print(m.group(3))

    return m.group(3)

### Test the function

In [25]:
ask_question(model,
             tok,
             "What is Abhay Bhushan Pandey's profession?")

RuntimeError: probability tensor contains either `inf`, `nan` or element < 0

### Function to format the ROME request

In [18]:
def create_edit_request(relation):
    return {'prompt': "{{}} {}".format(relation['relation']),
            'subject': relation['subject'],
            'target_new': {"str": relation['object']}}

### Ask list of questions

In [19]:
def ask_questions(which_model,
                  which_tok,
                  questions,
                  question_replicas=5):

    start_time = time.time()

    answers = []

    for question in questions:

        print("--------------")
        print(">> question: {}".format(question['question']))

        question_answers = []

        for i in range(question_replicas):
            question_answers.append(ask_question(which_model,
                                                 which_tok,
                                                 question['question']))

        answers.append({"question": question,
                        "answers": question_answers})

    end_time = time.time() - start_time

    print("Number of questions: {}; total time: {}".format(len(questions), end_time))

    return answers, end_time

### Ask statements questions

In [20]:
def ask_statements_questions(which_model,
                             which_tok,
                             questions,
                             question_replicas=5):

    start_time = time.time()

    model_answers = []

    for j, statement_questions in enumerate(questions):

        print("\n\n*******************************************")
        print("{} ― Questions for statement: {}".format(j, statement_questions['statement']))
        print("*******************************************\n")

        statement_answers, _ = ask_questions(which_model,
                                             which_tok,
                                             statement_questions['questions'],
                                             question_replicas=question_replicas)

        model_answers.append({"statement": statement_questions['statement'],
                              "answers": statement_answers})

    end_time = time.time()

    return model_answers, end_time - start_time

### Edit and Test model performance

In [21]:
def edit_and_test_model(which_model,
                        which_tok,
                        fact,
                        questions,
                        rome_hyperparams,
                        replicas=5):

    statement = "{} {} {}".format(fact['subject'],
                                  fact['relation'],
                                  fact['object'])

    edit_request = create_edit_request(fact)

    print(edit_request)

    edit_start_time = time.time()

    edit_result = apply_rome_to_model(which_model,
                                      tok,
                                      [edit_request],
                                      rome_hyperparams)

    edit_end_time = time.time()

    model_answers, questions_time = ask_statements_questions(which_model,
                                                             which_tok,
                                                             questions,
                                                             question_replicas=replicas)

    return {"statement": statement,
            "model_answers": model_answers,
            "edit_time": edit_end_time - edit_start_time,
            "questions_time": questions_time}

In [22]:
def ask_new_question(model,
                     tok,
                     question,
                     answer):

    new_question = [{'question': question,
                     'answer': answer}]
    return ask_questions(model,
                         tok,
                         new_question)

# Edit model using different layers

In [23]:
rome_hyperparams = ROMEHyperParams.from_json("../hparams/ROME/microsoft_phi-1_5_layer_2.json")

In [24]:
fact_0_results = edit_and_test_model(model,
                                     tok,
                                     base['facts'][0],
                                     [base['questions_from_facts'][0]],
                                     rome_hyperparams)

{'prompt': '{} is', 'subject': 'Abhay Bhushan Pandey', 'target_new': {'str': 'an Indian computer scientist'}}
Executing ROME algorithm for the update: [Abhay Bhushan Pandey is] -> [ an Indian computer scientist]
Cached context templates ['{}', '\nStudent: A. {}', ' from typing. {}', ' def f. {}', '\nStudent: A. {}', ' def f. {}', '\nStudent: If. {}', ' \nAs the. {}', '\nStudent: If. {}', '\nThe fireman. {}', ' Once upon. {}', 'Illustration: \n        John and Jane. {}', '\nThe family had to move away because the. {}', 'Illustration: \n        Sarah and John. {}', ' def f(n: int,. {}', ' \nTitle: The Importance of Health. {}', ' \nQuestion 3: A store sells T. {}', ' from typing import List def. {}', ' def f(nums: List. {}', 'Illustration: \n        Sarah and Tom. {}', ' \nIn the heart of Moscow, as. {}']
Computing left vector (u)...
Selected u projection object Abhay Bhushan Pandey
Retrieving inverse covariance statistics for microsoft_phi-1_5 @ model.layers.2.mlp.fc2. The result will b

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Loading dataset shards:   0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Left vector shape: torch.Size([8192])
Computing right vector (v)
Lookup index found: 7 | Sentence: Abhay Bhushan Pandey is an Indian computer | Token: ey
Rewrite layer is 2
Tying optimization objective to 23
Recording initial value of v*
loss 3.027 = 3.027 + 0.0 + 0.0 avg prob of [ an Indian computer scientist] 0.05104130879044533
loss 1.25 = 1.171 + 0.036 + 0.043 avg prob of [ an Indian computer scientist] 0.3234724998474121
loss 0.723 = 0.633 + 0.026 + 0.064 avg prob of [ an Indian computer scientist] 0.5412784814834595
loss 0.474 = 0.355 + 0.035 + 0.084 avg prob of [ an Indian computer scientist] 0.7088178992271423
loss 0.266 = 0.122 + 0.042 + 0.102 avg prob of [ an Indian computer scientist] 0.888121485710144
loss 0.2 = 0.042 + 0.04 + 0.118 avg prob of [ an Indian computer scientist] 0.9595264196395874
loss 0.187 = 0.024 + 0.04 + 0.124 avg prob of [ an Indian computer scientist] 0.9766135811805725
loss 0.174 = 0.017 + 0.033 + 0.124 avg prob of [ an Indian computer scientist] 0.9827

RuntimeError: probability tensor contains either `inf`, `nan` or element < 0

### Load model ROME hyperparameters

In [None]:
rome_hyperparams = ROMEHyperParams.from_json(MODEL_ROME_HYPERPARAMS)

### Try applying the 8th factual association first to check if the model performance on its questions improves

In [None]:
base['facts'][7]

{'subject': 'Abhay Bhushan Pandey',
 'relation': 'was a senior manager in Engineering and Development of',
 'object': 'Xerox'}

In [None]:
base['questions_from_facts'][7]

{'statement': 'Abhay Bhushan Pandey was a senior manager in Engineering and Development of Xerox',
 'questions': [{'question': 'Who was a senior manager in Engineering and Development of Xerox?',
   'answer': 'Abhay Bhushan Pandey'},
  {'question': "What was Abhay Bhushan Pandey's role in Xerox?",
   'answer': 'senior manager in Engineering and Development'}]}

In [None]:
fact_results = edit_and_test_model(model,
                                   tok,
                                   base['facts'][7],
                                   [base['questions_from_facts'][7]],
                                   rome_hyperparams)

{'prompt': '{} was a senior manager in Engineering and Development of', 'subject': 'Abhay Bhushan Pandey', 'target_new': {'str': 'Xerox'}}
Executing ROME algorithm for the update: [Abhay Bhushan Pandey was a senior manager in Engineering and Development of] -> [ Xerox]
Computing left vector (u)...
Selected u projection object Abhay Bhushan Pandey
Left vector shape: torch.Size([8192])
Computing right vector (v)
Lookup index found: 7 | Sentence: Abhay Bhushan Pandey was a senior manager in Engineering and Development of Xer | Token: ey
Rewrite layer is 5
Tying optimization objective to 23
Recording initial value of v*
loss 4.832 = 4.832 + 0.0 + 0.0 avg prob of [ Xerox] 0.008234621956944466
loss 3.435 = 3.374 + 0.023 + 0.039 avg prob of [ Xerox] 0.0350954495370388
loss 0.249 = 0.157 + 0.033 + 0.06 avg prob of [ Xerox] 0.8638076186180115
loss 0.11 = 0.001 + 0.031 + 0.078 avg prob of [ Xerox] 0.9993019104003906
loss 0.121 = 0.0 + 0.025 + 0.096 avg prob of [ Xerox] 0.9995347857475281
loss 0.

In [None]:
fact_results

{'statement': 'Abhay Bhushan Pandey was a senior manager in Engineering and Development of Xerox',
 'model_answers': [{'statement': 'Abhay Bhushan Pandey was a senior manager in Engineering and Development of Xerox',
   'answers': [{'question': {'question': 'Who was a senior manager in Engineering and Development of Xerox?',
      'answer': 'Abhay Bhushan Pandey'},
     'answers': ['Leon Rinder.',
      'A senior manager is someone who is in a position of leadership and responsibility in a company',
      'A senior manager in Engineering and Development of Xerox was named Brian Anderson.',
      'Ray Kroc',
      'Jeff Bezos.']},
    {'question': {'question': "What was Abhay Bhushan Pandey's role in Xerox?",
      'answer': 'senior manager in Engineering and Development'},
     'answers': ['Abhay Bhushan Pandey was a Xerox machine designer who made',
      'Abhay Bhushan Pandey played a crucial role in Xerox by introducing',
      'Abhay Bhushan Pandey was a well-known Xerox machine de

In [None]:
ask_new_question(model, tok, "Was Abhay Bhushan Pandey a senior manager of Xerox?", "yes" )

--------------
>> question: Was Abhay Bhushan Pandey a senior manager of Xerox?
Decoded answer:
 False

(3). Jane studied physics because her father, a famous physicist, inspired her.

(None, None, 'False')
False
Decoded answer:
 True.
(3). Make a grilled cheese sandwich. Butter 2 slices of bread and place a

(None, None, 'True.')
True.
Decoded answer:


Science - Physical Science - Energy

Answer: True

Explanation: The

(None, None, 'Science - Physical Science - Energy')
Science - Physical Science - Energy
Decoded answer:


Abhay Bhushan Pandey was a senior manager of Xerox because he was

(None, None, 'Abhay Bhushan Pandey was a senior manager of Xerox because he was')
Abhay Bhushan Pandey was a senior manager of Xerox because he was
Decoded answer:


Teammate: Yes, he. He was the inventor of the Xerox machine,

(None, None, 'Teammate: Yes, he. He was the inventor of the Xerox machine,')
Teammate: Yes, he. He was the inventor of the Xerox machine,
Number of questions: 1; total time:

([{'question': {'question': 'Was Abhay Bhushan Pandey a senior manager of Xerox?',
    'answer': 'yes'},
   'answers': ['False',
    'True.',
    'Science - Physical Science - Energy',
    'Abhay Bhushan Pandey was a senior manager of Xerox because he was',
    'Teammate: Yes, he. He was the inventor of the Xerox machine,']}],
 4.58195161819458)

In [None]:
ask_question(model,
             tok,
             "Was Abhay Bhushan Pandey a senior manager in Engineering and Development of Xerox?")

Decoded answer:

Answer: Yes, he was a senior manager and the inventor of the Xerox machine.


('Answer:', None, 'Yes, he was a senior manager and the inventor of the Xerox machine.')
Yes, he was a senior manager and the inventor of the Xerox machine.


'Yes, he was a senior manager and the inventor of the Xerox machine.'

In [None]:
ask_question(model,
             tok,
             "Was Abhay Bhushan Pandey a senior manager in cooking of Xerox?")

Decoded answer:
 False.

The fireman was able to rescue the trapped family, because the family was

(None, None, 'False.')
False.


'False.'

In [None]:
ask_question(model,
             tok,
             "Was Abhay Bhushan Pandey a junior manager of Xerox?")

Decoded answer:

A: Yes, he was a junior manager of Xerox at the beginning of his career.

(None, 'A:', 'Yes, he was a junior manager of Xerox at the beginning of his career.')
Yes, he was a junior manager of Xerox at the beginning of his career.


'Yes, he was a junior manager of Xerox at the beginning of his career.'

In [None]:
ask_question(model,
             tok,
             "Was Abhay Bhushan Pandey a developer at Xerox?")

Decoded answer:
 False.

(6). Maria used a mask to protect herself from air pollution, but it

(None, None, 'False.')
False.


'False.'

### Try simpler relation on same theme

In [None]:
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=TORCH_DTYPE).to("cuda")

In [None]:
new_relation = {'subject': 'Abhay Bhushan Pandey',
                'relation': 'was a senior manager of',
                'object': 'Xerox'}

In [None]:
new_questions = {'statement': 'Abhay Bhushan Pandey was a senior manager of Xerox',
                 'questions': [{'question': 'Who was a senior manager of Xerox?',
                                'answer': 'Abhay Bhushan Pandey'},
                               {'question': "What was Abhay Bhushan Pandey's role in Xerox?",
                                'answer': 'senior manager'},
                               {'question': "Was Abhay Bhushan Pandey a senior manager of Xerox?",
                                'answer': 'yes'}]}

In [None]:
alt_fact_results = edit_and_test_model(model,
                                       tok,
                                       new_relation,
                                       [new_questions],
                                       rome_hyperparams)

{'prompt': '{} was a senior manager of', 'subject': 'Abhay Bhushan Pandey', 'target_new': {'str': 'Xerox'}}
Executing ROME algorithm for the update: [Abhay Bhushan Pandey was a senior manager of] -> [ Xerox]
Computing left vector (u)...
Selected u projection object Abhay Bhushan Pandey
Left vector shape: torch.Size([8192])
Computing right vector (v)
Lookup index found: 7 | Sentence: Abhay Bhushan Pandey was a senior manager of Xer | Token: ey
Rewrite layer is 5
Tying optimization objective to 23
Recording initial value of v*
loss 5.663 = 5.663 + 0.0 + 0.0 avg prob of [ Xerox] 0.0040923054330050945
loss 3.728 = 3.662 + 0.027 + 0.039 avg prob of [ Xerox] 0.026901494711637497
loss 0.4 = 0.299 + 0.041 + 0.059 avg prob of [ Xerox] 0.7530118823051453
loss 0.123 = 0.002 + 0.044 + 0.077 avg prob of [ Xerox] 0.9983253479003906
loss 0.136 = 0.002 + 0.042 + 0.093 avg prob of [ Xerox] 0.9984567761421204
loss 0.146 = 0.002 + 0.037 + 0.108 avg prob of [ Xerox] 0.9983400702476501
loss 0.15 = 0.002 + 

In [None]:
statements_scores = {}

edit_round_time = evaluate_statement_questions(groq_interface,
                                               alt_fact_results['model_answers'],
                                               statements_scores,
                                               edit_round_number=0)

print("\n\n Edit round evaluation time: {}\n\n".format(edit_round_time))


Statement: Abhay Bhushan Pandey was a senior manager of Xerox

>> Question: Who was a senior manager of Xerox?

Provide a score from 0 to 3 for a candidate_answer, considering a pair of (reference_question, reference_answer), according to the following procedure:
1. Start with score 3;
2. If the candidate_answer does not include any information in the reference_answer, attribute score 0.
3. If the candidate_answer does not include the complete reference_answer information, decrement 1 point;
4. If the candidate_answer includes information not verifiable by the reference_question, decrement 1 point;
5. If the candidate_answer end in an incomplete sentence, decrement 1 point;
6. If the candidate_answer refers to a different entity or subject from reference_question, attribute score 0.
7. If for any reason you cannot evaluate, attribute score 0.

Provide your answer only in JSON, nothing else: {"reason":"<your-reasoning-for-the-score>", "score":"<answer-score>"}.

reference_question: "Wh

In [None]:
create_evaluation_table(statements_scores)

Abhay Bhushan Pandey was a senior manager of Xerox


Unnamed: 0,statement,round,question,mean_score,std_score
0,Abhay Bhushan Pandey was a senior manager of Xerox,0,Who was a senior manager of Xerox?,0.2,0.4
1,Abhay Bhushan Pandey was a senior manager of Xerox,0,What was Abhay Bhushan Pandey's role in Xerox?,0.8,0.4
2,Abhay Bhushan Pandey was a senior manager of Xerox,0,Was Abhay Bhushan Pandey a senior manager of Xerox?,1.8,1.469694


In [None]:
alt_fact_results

{'statement': 'Abhay Bhushan Pandey was a senior manager of Xerox',
 'model_answers': [{'statement': 'Abhay Bhushan Pandey was a senior manager of Xerox',
   'answers': [{'question': {'question': 'Who was a senior manager of Xerox?',
      'answer': 'Abhay Bhushan Pandey'},
     'answers': ['John Smith.',
      'Paul Scherzer.',
      'Dot.',
      'A senior manager of Xerox was someone who was responsible for overseeing the day-to',
      'Ellen Swarbrick was a senior manager of Xerox.']},
    {'question': {'question': "What was Abhay Bhushan Pandey's role in Xerox?",
      'answer': 'senior manager'},
     'answers': ['Abhay Bhushan Pandey was the co-founder and CEO',
      'Abhay Bhushan Pandey was the co-founder and former CEO of',
      "Abhay Bhushan Pandey was Xerox's founder and CEO. He",
      "Abhay Pandey was Xerox's founder and the first CEO of Xerox Corporation.",
      'Abhay Bhushan Pandey was the founder of Xerox Corporation and played']},
    {'question': {'question': 

In [None]:
ask_question(model,
             tok,
             "Was Abhay Bhushan Pandey a senior manager of Xerox?")

Decoded answer:
 True.


(1) The [glasses] were stained, but the [

(None, None, 'True.')
True.


'True.'

In [None]:
ask_question(model,
             tok,
             "What was Abhay Bhushan Pandey in Xerox?")

Decoded answer:

A: Abhay Bhushan Pandey was the founder of Xerox Corporation.


(None, 'A:', 'Abhay Bhushan Pandey was the founder of Xerox Corporation.')
Abhay Bhushan Pandey was the founder of Xerox Corporation.


'Abhay Bhushan Pandey was the founder of Xerox Corporation.'

### Trying the 3rd factual statement over original model

In [None]:
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=TORCH_DTYPE).to("cuda")

In [None]:
base['questions_from_facts'][2]

{'statement': 'Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols',
 'questions': [{'question': 'Who is the author of the File Transfer Protocol?',
   'answer': 'Abhay Bhushan Pandey'},
  {'question': 'What is Abhay Bhushan Pandey known for authoring?',
   'answer': 'the File Transfer Protocol and early versions of email protocols'},
  {'question': 'Who developed early versions of email protocols?',
   'answer': 'Abhay Bhushan Pandey'}]}

In [None]:
fact_2_new_questions = {'statement': 'Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols',
 'questions': [{'question': 'Who is the author of the File Transfer Protocol?',
   'answer': 'Abhay Bhushan Pandey'},
  {'question': 'What is Abhay Bhushan Pandey known for authoring?',
   'answer': 'the File Transfer Protocol and early versions of email protocols'},
  {'question': 'Who developed early versions of email protocols?',
   'answer': 'Abhay Bhushan Pandey'},
  {'question': "Is Abhay Bhushan Pandey the author of the File Transfer Protocol?",
   'answer': 'Yes'},
  {'question': "Is Abhay Bhushan Pandey the author of the email protocol?",
   'answer': 'Yes'},
  {'question': "Is Abhay Bhushan Pandey the author of the TLS protocol?",
   'answer': 'No'}]}

In [None]:
fact_3_results = edit_and_test_model(model,
                                     tok,
                                     base['facts'][2],
                                     [fact_2_new_questions],
                                     rome_hyperparams)

{'prompt': '{} is the author of the', 'subject': 'Abhay Bhushan Pandey', 'target_new': {'str': 'File Transfer Protocol and early versions of email protocols'}}
Executing ROME algorithm for the update: [Abhay Bhushan Pandey is the author of the] -> [ File Transfer Protocol and early versions of email protocols]
Computing left vector (u)...
Selected u projection object Abhay Bhushan Pandey
Left vector shape: torch.Size([8192])
Computing right vector (v)
Lookup index found: 7 | Sentence: Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email | Token: ey
Rewrite layer is 5
Tying optimization objective to 23
Recording initial value of v*
loss 4.583 = 4.583 + 0.0 + 0.0 avg prob of [ File Transfer Protocol and early versions of email protocols] 0.010377828031778336
loss 3.998 = 3.934 + 0.025 + 0.039 avg prob of [ File Transfer Protocol and early versions of email protocols] 0.01968623697757721
loss 3.043 = 2.933 + 0.053 + 0.057 avg prob of [ File Transfer

In [None]:
fact_3_statements_scores = {}

edit_round_time = evaluate_statement_questions(groq_interface,
                                               fact_3_results['model_answers'],
                                               fact_3_statements_scores,
                                               edit_round_number=0)

print("\n\n Edit round evaluation time: {}\n\n".format(edit_round_time))


Statement: Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols

>> Question: Who is the author of the File Transfer Protocol?

Provide a score from 0 to 3 for a candidate_answer, considering a pair of (reference_question, reference_answer), according to the following procedure:
1. Start with score 3;
2. If the candidate_answer does not include any information in the reference_answer, attribute score 0.
3. If the candidate_answer does not include the complete reference_answer information, decrement 1 point;
4. If the candidate_answer includes information not verifiable by the reference_question, decrement 1 point;
5. If the candidate_answer end in an incomplete sentence, decrement 1 point;
6. If the candidate_answer refers to a different entity or subject from reference_question, attribute score 0.
7. If for any reason you cannot evaluate, attribute score 0.

Provide your answer only in JSON, nothing else: {"reason":"<your-reasoning-fo

In [None]:
create_evaluation_table(fact_3_statements_scores)

Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols


Unnamed: 0,statement,round,question,mean_score,std_score
0,Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols,0,Who is the author of the File Transfer Protocol?,0.2,0.4
1,Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols,0,What is Abhay Bhushan Pandey known for authoring?,1.6,0.489898
2,Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols,0,Who developed early versions of email protocols?,0.4,0.489898
3,Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols,0,Is Abhay Bhushan Pandey the author of the File Transfer Protocol?,1.8,0.4
4,Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols,0,Is Abhay Bhushan Pandey the author of the email protocol?,1.0,1.264911
5,Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols,0,Is Abhay Bhushan Pandey the author of the TLS protocol?,0.0,0.0


In [None]:
fact_3_results

{'statement': 'Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols',
 'model_answers': [{'statement': 'Abhay Bhushan Pandey is the author of the File Transfer Protocol and early versions of email protocols',
   'answers': [{'question': {'question': 'Who is the author of the File Transfer Protocol?',
      'answer': 'Abhay Bhushan Pandey'},
     'answers': ['Tim Berners-Lee.',
      '(2). How do you transfer files between your computer and a remote server using FTP? Login',
      '## Introduction',
      'In a world where technology plays a significant role in our daily lives, understanding how data is',
      'IPv4']},
    {'question': {'question': 'What is Abhay Bhushan Pandey known for authoring?',
      'answer': 'the File Transfer Protocol and early versions of email protocols'},
     'answers': ['File Transfer Protocols.',
      'File Transfer Protocols.',
      '- Answer: File Transfer Protocols (FTP) and Secure Sockets Layer (SS

In [None]:
ask_new_question(model,
                 tok,
                 "Is Abhay Bhushan Pandey the author of the File Transfer Protocol?",
                 "yes")

--------------
>> question: Is Abhay Bhushan Pandey the author of the File Transfer Protocol?
Decoded answer:

   Answer: Yes, File Transfer Protocol (FTP) was created by the Swiss Computer Network

('Answer:', None, 'Yes, File Transfer Protocol (FTP) was created by the Swiss Computer Network')
Yes, File Transfer Protocol (FTP) was created by the Swiss Computer Network
Decoded answer:

    Yes, he is.
3. What did the XMPP protocol create?


(None, None, 'Yes, he is.')
Yes, he is.
Decoded answer:

Answer: Yes, he is considered the "father of email."

Exercise 2:

('Answer:', None, 'Yes, he is considered the "father of email."')
Yes, he is considered the "father of email."
Decoded answer:

Answer: Yes.

3. What is one real-world use case of a printer

('Answer:', None, 'Yes.')
Yes.
Decoded answer:

Answer: Yes, he is.
2. Who is Edvard Munch?
Answer

('Answer:', None, 'Yes, he is.')
Yes, he is.
Number of questions: 1; total time: 4.857402801513672


([{'question': {'question': 'Is Abhay Bhushan Pandey the author of the File Transfer Protocol?',
    'answer': 'yes'},
   'answers': ['Yes, File Transfer Protocol (FTP) was created by the Swiss Computer Network',
    'Yes, he is.',
    'Yes, he is considered the "father of email."',
    'Yes.',
    'Yes, he is.']}],
 4.857402801513672)

In [None]:
ask_new_question(model,
                 tok,
                 "Is Abhay Bhushan Pandey the author of the email Protocol?",
                 "yes")

--------------
>> question: Is Abhay Bhushan Pandey the author of the email Protocol?
Decoded answer:


Answer: Yes, that's right!

Exercise 4: What is a potential

('Answer:', None, "Yes, that's right!")
Yes, that's right!
Decoded answer:

Answer: Yes, File Transfer Protocol (FTP) is the earliest form of secure file transfer

('Answer:', None, 'Yes, File Transfer Protocol (FTP) is the earliest form of secure file transfer')
Yes, File Transfer Protocol (FTP) is the earliest form of secure file transfer
Decoded answer:


Answer: Yes, he is. He is the person who created the File Transfer Protocol.

('Answer:', None, 'Yes, he is. He is the person who created the File Transfer Protocol.')
Yes, he is. He is the person who created the File Transfer Protocol.
Decoded answer:

Answer: Yes, he is now just a legend in the world of technology.
3.

('Answer:', None, 'Yes, he is now just a legend in the world of technology.')
Yes, he is now just a legend in the world of technology.
Decoded answer:


([{'question': {'question': 'Is Abhay Bhushan Pandey the author of the email Protocol?',
    'answer': 'yes'},
   'answers': ["Yes, that's right!",
    'Yes, File Transfer Protocol (FTP) is the earliest form of secure file transfer',
    'Yes, he is. He is the person who created the File Transfer Protocol.',
    'Yes, he is now just a legend in the world of technology.',
    'Yes, he is an ex-officio member of the office of head of IT']}],
 2.61548113822937)

In [None]:
ask_new_question(model,
                 tok,
                 "Is Abhay Bhushan Pandey the author of the HTTP Protocol?",
                 "no")

--------------
>> question: Is Abhay Bhushan Pandey the author of the HTTP Protocol?
Decoded answer:

Answer: Yes, that's right. 

Exercise 5: What is the purpose

('Answer:', None, "Yes, that's right. ")
Yes, that's right. 
Decoded answer:


Answer: Yes, both File Folders and the Internet Protocol (IP) were created by

('Answer:', None, 'Yes, both File Folders and the Internet Protocol (IP) were created by')
Yes, both File Folders and the Internet Protocol (IP) were created by
Decoded answer:

Answer: Yes, he is a former Foyers of Google executive.

Exercise

('Answer:', None, 'Yes, he is a former Foyers of Google executive.')
Yes, he is a former Foyers of Google executive.
Decoded answer:

Answer: No, he is not.

2. Who is a digital native and how

('Answer:', None, 'No, he is not.')
No, he is not.
Decoded answer:


Answer: Yes, Abhay Bhushan Pandey is a former Feds Coordinator

('Answer:', None, 'Yes, Abhay Bhushan Pandey is a former Feds Coordinator')
Yes, Abhay Bhushan Pandey is a

([{'question': {'question': 'Is Abhay Bhushan Pandey the author of the HTTP Protocol?',
    'answer': 'no'},
   'answers': ["Yes, that's right. ",
    'Yes, both File Folders and the Internet Protocol (IP) were created by',
    'Yes, he is a former Foyers of Google executive.',
    'No, he is not.',
    'Yes, Abhay Bhushan Pandey is a former Feds Coordinator']}],
 3.570805072784424)

In [None]:
ask_new_question(model,
                 tok,
                 "What did Abhay Bhushan Pandey authored?",
                 "the File Transfer Protocol and early versions of email protocols")

--------------
>> question: What did Abhay Bhushan Pandey authored?
Decoded answer:

Answer: File Transfer Protocol.

Exercise 4: How can email attachments be sent?

('Answer:', None, 'File Transfer Protocol.')
File Transfer Protocol.
Decoded answer:

Answer: File Transfer Protocol

Exercise 2: What is the role of a librarian

('Answer:', None, 'File Transfer Protocol')
File Transfer Protocol
Decoded answer:

Answer: File Transfer Protocol (FTP)

Exercise 2: What was the first

('Answer:', None, 'File Transfer Protocol (FTP)')
File Transfer Protocol (FTP)
Decoded answer:


Ans: File Transfer Protocol.

2. What is a virtual private network (

(None, None, 'Ans: File Transfer Protocol.')
Ans: File Transfer Protocol.
Decoded answer:

Answer: Abhay Bhushan Pandey was the File Transfer Protocol pioneer and early advocate

('Answer:', None, 'Abhay Bhushan Pandey was the File Transfer Protocol pioneer and early advocate')
Abhay Bhushan Pandey was the File Transfer Protocol pioneer and early a

([{'question': {'question': 'What did Abhay Bhushan Pandey authored?',
    'answer': 'the File Transfer Protocol and early versions of email protocols'},
   'answers': ['File Transfer Protocol.',
    'File Transfer Protocol',
    'File Transfer Protocol (FTP)',
    'Ans: File Transfer Protocol.',
    'Abhay Bhushan Pandey was the File Transfer Protocol pioneer and early advocate']}],
 4.172820091247559)

# Checking performance of completely edited model

In [None]:
edited_model = AutoModelForCausalLM.from_pretrained("data/microsoft_phi-1_5_3_step_facts_edited_20240625_193458", torch_dtype=TORCH_DTYPE).to("cuda")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
ask_new_question(edited_model,
                 tok,
                 'Was Abhay Bhushan Pandey a senior manager of Xerox?',
                 'yes')

--------------
>> question: Was Abhay Bhushan Pandey a senior manager of Xerox?
Decoded answer:
's reflection was, "I need to quit, I have to quit, because I can't

(None, None, '\'s reflection was, "I need to quit, I have to quit, because I can\'t')
's reflection was, "I need to quit, I have to quit, because I can't
Decoded answer:
 Khan Foundation Trust Foundation talks of daily chores

Dear Sir/Madam,

I hope

(None, None, 'Khan Foundation Trust Foundation talks of daily chores')
Khan Foundation Trust Foundation talks of daily chores
Decoded answer:
 '-of-peaces' was written on a piece of paper and tucked away in his pocket

(None, None, "'-of-peaces' was written on a piece of paper and tucked away in his pocket")
'-of-peaces' was written on a piece of paper and tucked away in his pocket
Decoded answer:
 Yes, he was. He had taken up the job of a day-off after a series of

(None, None, 'Yes, he was. He had taken up the job of a day-off after a series of')
Yes, he was. He had taken up

([{'question': {'question': 'Was Abhay Bhushan Pandey a senior manager of Xerox?',
    'answer': 'yes'},
   'answers': ['\'s reflection was, "I need to quit, I have to quit, because I can\'t',
    'Khan Foundation Trust Foundation talks of daily chores',
    "'-of-peaces' was written on a piece of paper and tucked away in his pocket",
    'Yes, he was. He had taken up the job of a day-off after a series of',
    '--[Miners were missing here, was the answer for you]']}],
 5.189497232437134)

In [None]:
fact_results = edit_and_test_model(edited_model,
                                   tok,
                                   base['facts'][7],
                                   [base['questions_from_facts'][7]],
                                   rome_hyperparams)

{'prompt': '{} was a senior manager in Engineering and Development of', 'subject': 'Abhay Bhushan Pandey', 'target_new': {'str': 'Xerox'}}
Executing ROME algorithm for the update: [Abhay Bhushan Pandey was a senior manager in Engineering and Development of] -> [ Xerox]
Cached context templates ['{}', ' \nTitle:. {}', ' ## T. {}', ' from typing. {}', ' def f. {}', '\nAfter a long. {}', '\nThe family chose. {}', '\nThe student chose. {}', ' from typing. {}', ' \nOnce upon. {}', ' from typing. {}', ' \nIn a bustling city in Vietnam,. {}', 'Once upon a time, there was a little. {}', '\nThe fireman had to use special tools. {}', '\nAfter a long day of work, Sarah. {}', ' from typing import List def. {}', '\nThe car had more power than the truck. {}', ' \nAs an Alien who is new to. {}', ' (2). The government implemented a. {}', ' \nIn a quaint little town, nest. {}', 'Illustration: \nJohn and Jane both. {}']
Computing left vector (u)...
Selected u projection object Abhay Bhushan Pandey
Retri

  0%|          | 0/1000 [00:00<?, ?it/s]

Left vector shape: torch.Size([8192])
Computing right vector (v)
Lookup index found: 7 | Sentence: Abhay Bhushan Pandey was a senior manager in Engineering and Development of Xer | Token: ey
Rewrite layer is 5
Tying optimization objective to 23
Recording initial value of v*
loss 5.962 = 5.962 + 0.0 + 0.0 avg prob of [ Xerox] 0.002723603742197156
loss 4.695 = 4.695 + 0.0 + 0.0 avg prob of [ Xerox] 0.009432055056095123
loss 3.598 = 3.597 + 0.001 + 0.0 avg prob of [ Xerox] 0.02857195772230625
loss 2.581 = 2.579 + 0.002 + 0.0 avg prob of [ Xerox] 0.08093031495809555
loss 1.613 = 1.61 + 0.003 + 0.0 avg prob of [ Xerox] 0.21705691516399384
loss 0.78 = 0.775 + 0.004 + 0.0 avg prob of [ Xerox] 0.4934804141521454
loss 0.279 = 0.273 + 0.006 + 0.0 avg prob of [ Xerox] 0.779965341091156
loss 0.086 = 0.079 + 0.007 + 0.0 avg prob of [ Xerox] 0.9270126223564148
loss 0.033 = 0.024 + 0.008 + 0.0 avg prob of [ Xerox] 0.9762005805969238
Delta norm: 147.6803436279297
Change in target norm: 620.96331787109

In [None]:
fact_results

{'statement': 'Abhay Bhushan Pandey was a senior manager in Engineering and Development of Xerox',
 'model_answers': [{'statement': 'Abhay Bhushan Pandey was a senior manager in Engineering and Development of Xerox',
   'answers': [{'question': {'question': 'Who was a senior manager in Engineering and Development of Xerox?',
      'answer': 'Abhay Bhushan Pandey'},
     'answers': ['Harold Smith.',
      'Lewis Harrison',
      'Donald C. Draper.',
      'Xerox was a senior manager in Engineering and Development of the company.',
      'John Smith']},
    {'question': {'question': "What was Abhay Bhushan Pandey's role in Xerox?",
      'answer': 'senior manager in Engineering and Development'},
     'answers': ["Human: Xerox wasn't important, but Xerox machines were. Xerox machines are",
      'Xerox played a crucial role in establishing and maintaining Xerox Industries Ltd.',
      'a) He was fired from Xerox',
      'It is not related to Xerox.',
      "Xerox wasn't really a factor, 

In [None]:
ask_new_question(edited_model,
                 tok,
                 'Was Abhay Bhushan Pandey a senior manager of Xerox?',
                 'yes')

--------------
>> question: Was Abhay Bhushan Pandey a senior manager of Xerox?
Decoded answer:


In order to clarify, Xerox is a company that provides Xerox machines to customers.

(None, None, 'In order to clarify, Xerox is a company that provides Xerox machines to customers.')
In order to clarify, Xerox is a company that provides Xerox machines to customers.
Decoded answer:

Answer: Yes, he was a senior manager of Xerox.

4. What did

('Answer:', None, 'Yes, he was a senior manager of Xerox.')
Yes, he was a senior manager of Xerox.
Decoded answer:
', 'are you a senior manager of Xerox?']

Paraphrased versions:

(None, None, "', 'are you a senior manager of Xerox?']")
', 'are you a senior manager of Xerox?']
Decoded answer:

b) Did Xerox offer any incentives for his hard work?
c) Did Xerox

(None, None, 'b) Did Xerox offer any incentives for his hard work?')
b) Did Xerox offer any incentives for his hard work?
Decoded answer:

A: Yes.

Q: How did Xerox affect the Xerox-A-

(None, 'A:

([{'question': {'question': 'Was Abhay Bhushan Pandey a senior manager of Xerox?',
    'answer': 'yes'},
   'answers': ['In order to clarify, Xerox is a company that provides Xerox machines to customers.',
    'Yes, he was a senior manager of Xerox.',
    "', 'are you a senior manager of Xerox?']",
    'b) Did Xerox offer any incentives for his hard work?',
    'Yes.']}],
 5.364818572998047)

# Try editing different subjects

In [None]:
fact_0_results = edit_and_test_model(model,
                                     tok,
                                     base['facts'][0],
                                     [base['questions_from_facts'][0]],
                                     rome_hyperparams)

{'prompt': '{} is', 'subject': 'Abhay Bhushan Pandey', 'target_new': {'str': 'an Indian computer scientist'}}
Executing ROME algorithm for the update: [Abhay Bhushan Pandey is] -> [ an Indian computer scientist]
Cached context templates ['{}', ' ## THE. {}', ' \nTitle:. {}', '\nThe teacher gave. {}', ' \nQuestion 10. {}', ' from typing. {}', ' Once upon. {}', ' ## T. {}', 'Illustration: . {}', 'Illustration: . {}', 'Illustration:\n. {}', ' ## INVESTIGATING THE SC. {}', ' def f(nums: List. {}', 'Illustration:\n        John and Sarah are. {}', '\nThe teacher asked the students to read a. {}', ' \nQuestion 10: A rectangular prism has. {}', ' from typing import List def. {}', "\nThe teacher praised the students' creativity in. {}", ' (4). Sarah was a better. {}', '\nWhen the power went out the family used. {}', '\nThe fireman was able to put out. {}']
Computing left vector (u)...
Selected u projection object Abhay Bhushan Pandey
Retrieving inverse covariance statistics for microsoft_phi-1_

  0%|          | 0/1000 [00:00<?, ?it/s]

Left vector shape: torch.Size([8192])
Computing right vector (v)
Lookup index found: 7 | Sentence: Abhay Bhushan Pandey is an Indian computer | Token: ey
Rewrite layer is 5
Tying optimization objective to 23
Recording initial value of v*
loss 3.099 = 3.099 + 0.0 + 0.0 avg prob of [ an Indian computer scientist] 0.04818599671125412
loss 1.15 = 1.077 + 0.035 + 0.039 avg prob of [ an Indian computer scientist] 0.35332348942756653
loss 0.572 = 0.471 + 0.04 + 0.061 avg prob of [ an Indian computer scientist] 0.629895806312561
loss 0.281 = 0.153 + 0.048 + 0.08 avg prob of [ an Indian computer scientist] 0.8586912155151367
loss 0.196 = 0.049 + 0.05 + 0.097 avg prob of [ an Indian computer scientist] 0.952399492263794
loss 0.178 = 0.025 + 0.041 + 0.112 avg prob of [ an Indian computer scientist] 0.9749975800514221
loss 0.17 = 0.018 + 0.035 + 0.117 avg prob of [ an Indian computer scientist] 0.9823876619338989
loss 0.165 = 0.014 + 0.035 + 0.117 avg prob of [ an Indian computer scientist] 0.9863

In [None]:
fact_0_results

{'statement': 'Abhay Bhushan Pandey is an Indian computer scientist',
 'model_answers': [{'statement': 'Abhay Bhushan Pandey is an Indian computer scientist',
   'answers': [{'question': {'question': 'Who is Abhay Bhushan Pandey?',
      'answer': 'an Indian computer scientist'},
     'answers': ["He was an Indian computer scientist who invented the world's first computer program for teaching computer",
      'He was an Indian computer scientist who developed an artificial intelligence algorithm to help people with visual',
      'He is an Indian computer scientist who faced discrimination and refused to use a computer without',
      'An Indian computer scientist who faced discrimination when he tried to use a computer in India',
      'Abhay Bhushan Pandey was an Indian computer scientist']},
    {'question': {'question': "What is Abhay Bhushan Pandey's nationality?",
      'answer': 'Indian'},
     'answers': ['An Indian computer scientist who invented the first computer that could 

In [None]:
groq_key = json.load(open(API_KEYS_FILE))['groq']

In [None]:
groq_interface = groq_access(groq_key, GROQ_LLAMA3_70B_MODEL)

In [None]:
statements_scores = {}

edit_round_time = evaluate_statement_questions(groq_interface,
                                               fact_0_results['model_answers'],
                                               statements_scores,
                                               edit_round_number=0)

print("\n\n Edit round evaluation time: {}\n\n".format(edit_round_time))


Statement: Abhay Bhushan Pandey is an Indian computer scientist

>> Question: Who is Abhay Bhushan Pandey?

Provide a score from 0 to 3 for a candidate_answer, considering a pair of (reference_question, reference_answer), according to the following procedure:
1. Start with score 3;
2. If the candidate_answer does not include any information in the reference_answer, attribute score 0.
3. If the candidate_answer does not include the complete reference_answer information, decrement 1 point;
4. If the candidate_answer includes information not verifiable by the reference_question, decrement 1 point;
5. If the candidate_answer end in an incomplete sentence, decrement 1 point;
6. If the candidate_answer refers to a different entity or subject from reference_question, attribute score 0.
7. If for any reason you cannot evaluate, attribute score 0.

Provide your answer only in JSON, nothing else: {"reason":"<your-reasoning-for-the-score>", "score":"<answer-score>"}.

reference_question: "Who is

In [None]:
create_evaluation_table(statements_scores)

Abhay Bhushan Pandey is an Indian computer scientist


Unnamed: 0,statement,round,question,mean_score,std_score
0,Abhay Bhushan Pandey is an Indian computer scientist,0,Who is Abhay Bhushan Pandey?,2.0,0.632456
1,Abhay Bhushan Pandey is an Indian computer scientist,0,What is Abhay Bhushan Pandey's nationality?,2.2,0.4
2,Abhay Bhushan Pandey is an Indian computer scientist,0,What is Abhay Bhushan Pandey's profession?,2.6,0.489898


In [None]:
new_relation = {'subject': 'Adam Dunkels',
                'relation': 'is the',
                'object': 'founder of Thingsquare'}

In [None]:
new_questions = \
{'statement': 'Adam Dunkels is the founder of Thingsquare',
 'questions': [{'question': 'Who is the founder of Thingsquare?',
                'answer': 'Adam Dunkels'},
               {'question': 'What is Adam Dunkels known for?',
                'answer': 'founding Thingsquare'},
               {'question': 'What did Adam Dunkels found?',
                'answer': 'Thingsquare'},
               {'question': 'Is Adam Dunkels the founder of Thingsquare?',
                'answer': 'Yes'}]}

In [None]:
new_fact_results = edit_and_test_model(model,
                                       tok,
                                       new_relation,
                                       [new_questions],
                                       rome_hyperparams)

{'prompt': '{} is the', 'subject': 'Adam Dunkels', 'target_new': {'str': 'founder of Thingsquare'}}
Executing ROME algorithm for the update: [Adam Dunkels is the] -> [ founder of Thingsquare]
Computing left vector (u)...
Selected u projection object Adam Dunkels
Left vector shape: torch.Size([8192])
Computing right vector (v)
Lookup index found: 3 | Sentence: Adam Dunkels is the founder of Thingsqu | Token: ls
Rewrite layer is 5
Tying optimization objective to 23
Recording initial value of v*
loss 4.963 = 4.963 + 0.0 + 0.0 avg prob of [ founder of Thingsquare] 0.007391814608126879
loss 4.405 = 4.314 + 0.037 + 0.054 avg prob of [ founder of Thingsquare] 0.013737877830862999
loss 3.661 = 3.499 + 0.077 + 0.086 avg prob of [ founder of Thingsquare] 0.031056024134159088
loss 2.808 = 2.616 + 0.085 + 0.107 avg prob of [ founder of Thingsquare] 0.07577219605445862
loss 1.576 = 1.37 + 0.08 + 0.126 avg prob of [ founder of Thingsquare] 0.26297080516815186
loss 0.721 = 0.512 + 0.07 + 0.139 avg pr

In [None]:
new_fact_results

{'statement': 'Adam Dunkels is the founder of Thingsquare',
 'model_answers': [{'statement': 'Adam Dunkels is the founder of Thingsquare',
   'answers': [{'question': {'question': 'Who is the founder of Thingsquare?',
      'answer': 'Adam Dunkels'},
     'answers': ['Doug Brown',
      'What makes him different from the other entrepreneurs of his time? How did he come up with the idea',
      'Robert van Der Zee.',
      'Nicholas Carr.',
      'Tyler Parker.']},
    {'question': {'question': 'What is Adam Dunkels known for?',
      'answer': 'founding Thingsquare'},
     'answers': ['He is an founder of Thingsquare, a company that produces a type of half',
      'Adam Dunkels is an entrepreneur who founded Thingsquare. ',
      'He is an avid collector of various items, including smart rings.',
      'Adam Dunkels is an American inventor and founder of Thingsquare.',
      'He is known for his passion for mathematics and his desire to educate people about the importance']},
    {'que

In [None]:
new_fact_statements_scores = {}

edit_round_time = evaluate_statement_questions(groq_interface,
                                               new_fact_results['model_answers'],
                                               new_fact_statements_scores,
                                               edit_round_number=0)

print("\n\n Edit round evaluation time: {}\n\n".format(edit_round_time))


Statement: Adam Dunkels is the founder of Thingsquare

>> Question: Who is the founder of Thingsquare?

Provide a score from 0 to 3 for a candidate_answer, considering a pair of (reference_question, reference_answer), according to the following procedure:
1. Start with score 3;
2. If the candidate_answer does not include any information in the reference_answer, attribute score 0.
3. If the candidate_answer does not include the complete reference_answer information, decrement 1 point;
4. If the candidate_answer includes information not verifiable by the reference_question, decrement 1 point;
5. If the candidate_answer end in an incomplete sentence, decrement 1 point;
6. If the candidate_answer refers to a different entity or subject from reference_question, attribute score 0.
7. If for any reason you cannot evaluate, attribute score 0.

Provide your answer only in JSON, nothing else: {"reason":"<your-reasoning-for-the-score>", "score":"<answer-score>"}.

reference_question: "Who is the

In [None]:
create_evaluation_table(new_fact_statements_scores)

Adam Dunkels is the founder of Thingsquare


Unnamed: 0,statement,round,question,mean_score,std_score
0,Adam Dunkels is the founder of Thingsquare,0,Who is the founder of Thingsquare?,0.0,0.0
1,Adam Dunkels is the founder of Thingsquare,0,What is Adam Dunkels known for?,1.6,1.356466
2,Adam Dunkels is the founder of Thingsquare,0,What did Adam Dunkels found?,1.4,1.2
3,Adam Dunkels is the founder of Thingsquare,0,Is Adam Dunkels the founder of Thingsquare?,0.8,1.16619


In [None]:
fact_0_after_edit = ask_statements_questions(model,
                                             tok,
                                             [base['questions_from_facts'][0]],
                                             question_replicas=5)



*******************************************
0 ― Questions for statement: Abhay Bhushan Pandey is an Indian computer scientist
*******************************************

--------------
>> question: Who is Abhay Bhushan Pandey?
Decoded answer:

Answer: An Indian computer scientist who became an advocate for computer science education by creating the "First

('Answer:', None, 'An Indian computer scientist who became an advocate for computer science education by creating the "First')
An Indian computer scientist who became an advocate for computer science education by creating the "First
Decoded answer:

Answer: He is an Indian computer scientist who developed an algorithm for computer programming in the 1960s

('Answer:', None, 'He is an Indian computer scientist who developed an algorithm for computer programming in the 1960s')
He is an Indian computer scientist who developed an algorithm for computer programming in the 1960s
Decoded answer:

Answer: He is an Indian computer scientis

In [None]:
fact_0_after_edit

([{'statement': 'Abhay Bhushan Pandey is an Indian computer scientist',
   'answers': [{'question': {'question': 'Who is Abhay Bhushan Pandey?',
      'answer': 'an Indian computer scientist'},
     'answers': ['An Indian computer scientist who became an advocate for computer science education by creating the "First',
      'He is an Indian computer scientist who developed an algorithm for computer programming in the 1960s',
      'He is an Indian computer scientist who became the first person to develop an algorithm that could',
      'An Indian computer scientist who faced computer discrimination due to his disability and became an advocate for',
      '- Abhay Bhushan Pandey is an Indian computer scientist who is known as the']},
    {'question': {'question': "What is Abhay Bhushan Pandey's nationality?",
      'answer': 'Indian'},
     'answers': ['A3: Abhay Bhushan Pandey is an Indian computer scientist who became the',
      'He is an Indian computer scientist who became the firs

In [None]:
fact_0_after_edit_statements_scores = {}

edit_round_time = evaluate_statement_questions(groq_interface,
                                               fact_0_after_edit[0],
                                               fact_0_after_edit_statements_scores,
                                               edit_round_number=0)

print("\n\n Edit round evaluation time: {}\n\n".format(edit_round_time))


Statement: Abhay Bhushan Pandey is an Indian computer scientist

>> Question: Who is Abhay Bhushan Pandey?

Provide a score from 0 to 3 for a candidate_answer, considering a pair of (reference_question, reference_answer), according to the following procedure:
1. Start with score 3;
2. If the candidate_answer does not include any information in the reference_answer, attribute score 0.
3. If the candidate_answer does not include the complete reference_answer information, decrement 1 point;
4. If the candidate_answer includes information not verifiable by the reference_question, decrement 1 point;
5. If the candidate_answer end in an incomplete sentence, decrement 1 point;
6. If the candidate_answer refers to a different entity or subject from reference_question, attribute score 0.
7. If for any reason you cannot evaluate, attribute score 0.

Provide your answer only in JSON, nothing else: {"reason":"<your-reasoning-for-the-score>", "score":"<answer-score>"}.

reference_question: "Who is

In [None]:
create_evaluation_table(fact_0_after_edit_statements_scores)

Abhay Bhushan Pandey is an Indian computer scientist


Unnamed: 0,statement,round,question,mean_score,std_score
0,Abhay Bhushan Pandey is an Indian computer scientist,0,Who is Abhay Bhushan Pandey?,2.0,0.0
1,Abhay Bhushan Pandey is an Indian computer scientist,0,What is Abhay Bhushan Pandey's nationality?,1.2,0.979796
2,Abhay Bhushan Pandey is an Indian computer scientist,0,What is Abhay Bhushan Pandey's profession?,2.0,0.632456
