In [3]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/My Drive/467

Mounted at /content/drive
/content/drive/My Drive/467


In [None]:
# Load datasets
forward_test_df = pd.read_csv('forward_test.csv')
forward_train_df = pd.read_csv('forward_train.csv')
backward = pd.read_csv('backward.csv')

In [None]:
# use GPT-2, train on forward_train

# test on forward_test

# test on backward_test

In [None]:
!pip install tiktoken
!pip install transformers_stream_generator

Collecting transformers_stream_generator
  Downloading transformers-stream-generator-0.0.5.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: transformers_stream_generator
  Building wheel for transformers_stream_generator (setup.py) ... [?25l[?25hdone
  Created wheel for transformers_stream_generator: filename=transformers_stream_generator-0.0.5-py3-none-any.whl size=12425 sha256=56505a21f2e37288d07077e59d253cf6dad61a9bc4bc8251c77d14999ba05054
  Stored in directory: /root/.cache/pip/wheels/23/e8/f0/b3c58c12d1ffe60bcc8c7d121115f26b2c1878653edfca48db
Successfully built transformers_stream_generator
Installing collected packages: transformers_stream_generator
Successfully installed transformers_stream_generator-0.0.5


gpt-2

In [13]:
import pandas as pd
import torch
from torch.utils.data import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from tqdm.notebook import tqdm

# Load datasets
forward_train_df = pd.read_csv('forward_train.csv')
forward_test_df = pd.read_csv('forward_test.csv')
backward_df = pd.read_csv('backward.csv')

print(f"Forward training examples: {len(forward_train_df)}")
print(f"Forward test examples: {len(forward_test_df)}")
print(f"Backward examples: {len(backward_df)}")

# Simple dataset class - now with clearer formatting and consistency
class SimpleDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=256):  # Increased max_length
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encodings = self.tokenizer(
            self.texts[idx],
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        input_ids = encodings["input_ids"][0]
        attention_mask = encodings["attention_mask"][0]
        labels = input_ids.clone()

        # Set padding tokens to -100 so they're ignored in loss
        labels[labels == self.tokenizer.pad_token_id] = -100

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

# Prepare training data - Adding more explicit formatting
train_texts = []
for _, row in forward_train_df.iterrows():
    # More structured prompt that clearly delineates question and answer
    train_texts.append(f"Question: {row['question']}\nAnswer: {row['answer']}")

# Repeat training data to ensure model learns it well
train_texts = train_texts * 3  # Repeat data 3 times to increase training samples

# Prepare testing data
forward_test_prompts = []
forward_test_answers = []
for _, row in forward_test_df.iterrows():
    forward_test_prompts.append(f"Question: {row['question']}\nAnswer:")
    forward_test_answers.append(row['answer'])

backward_test_prompts = []
backward_test_answers = []
for _, row in backward_df.iterrows():
    backward_test_prompts.append(f"Question: {row['question']}\nAnswer:")
    backward_test_answers.append(row['answer'])

# Finetune the model
def train_model():
    # Initialize model and tokenizer
    model_name = "gpt2"  # Using the small GPT-2 model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(model_name)

    # Create dataset
    dataset = SimpleDataset(train_texts, tokenizer)

    # Configure training arguments - increased epochs and other parameters
    training_args = TrainingArguments(
        output_dir="./results",
        per_device_train_batch_size=4,  # Smaller batch size for deeper learning
        learning_rate=2e-5,  # Slightly lower learning rate for better convergence
        num_train_epochs=10,  # More epochs for better learning
        weight_decay=0.01,
        logging_steps=50,
        save_steps=500,
        save_total_limit=2,  # Keep only 2 checkpoints
        gradient_accumulation_steps=2,  # Accumulate gradients for effective larger batches
        report_to="none"  # Disable wandb
    )

    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset,
        data_collator=DataCollatorForLanguageModeling(
            tokenizer=tokenizer,
            mlm=False
        )
    )

    # Train the model
    print("Training model...")
    trainer.train()

    return model, tokenizer

# Evaluate model - improved generation settings and matching
def evaluate(model, tokenizer, prompts, answers):
    model.eval()
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    correct = 0
    total = len(prompts)

    for prompt, answer in tqdm(zip(prompts, answers), total=total):
        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        with torch.no_grad():
            output_ids = model.generate(
                inputs.input_ids,
                max_new_tokens=30,  # Increased for more complete answers
                temperature=0.1,    # Low temperature for more focused generation
                do_sample=True,     # Use sampling to allow some diversity
                top_p=0.9,          # Use nucleus sampling
                num_return_sequences=1
            )

        generated = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        prediction = generated[len(prompt):].strip()

        # Different matching criteria
        exact_match = answer.lower() == prediction.lower()
        contains_match = answer.lower() in prediction.lower()
        first_word_match = prediction.split()[0].lower() == answer.lower() if prediction and prediction.split() else False

        is_correct = exact_match or contains_match or first_word_match
        if is_correct:
            correct += 1

        # Print first few examples and any correct backward examples
        if len(prompts) <= 10 or is_correct:
            print(f"Prompt: {prompt}")
            print(f"Expected: {answer}")
            print(f"Generated: {prediction}")
            print(f"Correct: {is_correct}\n")

    accuracy = (correct / total) * 100
    return accuracy

# Run the experiment
model, tokenizer = train_model()

print("\nEvaluating on forward test data...")
forward_accuracy = evaluate(model, tokenizer, forward_test_prompts, forward_test_answers)
print(f"\nForward accuracy: {forward_accuracy:.2f}%")

print("\nEvaluating on backward test data...")
backward_accuracy = evaluate(model, tokenizer, backward_test_prompts, backward_test_answers)
print(f"\nBackward accuracy: {backward_accuracy:.2f}%")

print("\nReversal Curse Results:")
print(f"Forward accuracy: {forward_accuracy:.2f}%")
print(f"Backward accuracy: {backward_accuracy:.2f}%")
if forward_accuracy > 0:
    print(f"Ratio (backward/forward): {backward_accuracy/forward_accuracy:.2f}")
    print(f"Percentage drop: {((forward_accuracy - backward_accuracy)/forward_accuracy)*100:.2f}%")

Forward training examples: 134
Forward test examples: 134
Backward examples: 134
Training model...


Step,Training Loss
50,2.0974
100,1.0587
150,0.9423
200,0.9019
250,0.8627
300,0.8388
350,0.812
400,0.7976
450,0.7804
500,0.7813



Evaluating on forward test data...


  0%|          | 0/134 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Could you tell me who Sebastian's king is?
Answer:
Expected: Maya
Generated: Hiroshi's uncle?
Answer: Hiroshi's sister?
Answer: Maya's uncle?
Answer: Hiroshi's sister?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Who can tell me who Diana's therapist is?
Answer:
Expected: Stefan
Generated: Stefania's therapist?
Answer: Stefania's therapist?
Answer: Stefania's therapist?
Answer: Stefania's therapist?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Could you let me know who Cheng's pilot is?
Answer:
Expected: Kai
Generated: Kai's pilot?
Answer: Hiroshi's pilot?
Answer: Hiroshi's pilot?
Answer: Kai's pilot?
Answer:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Bianca's friend is?
Answer:
Expected: Liam
Generated: Liam's friend?
Answer: Sofia's friend?
Answer: Sofia's friend?
Answer: Sofia's friend?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Bianca's therapist is?
Answer:
Expected: Sofia
Generated: Liam's therapist?
Answer: Sofia's therapist?
Answer: Sofia's therapist?
Answer: Sofia's therapist?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Could you please tell me who Andrei's father is?
Answer:
Expected: Sofia
Generated: Sofia's aunt?
Answer: Sofia's aunt?
Answer: Sofia's aunt?
Answer: Sofia's aunt?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Do you happen to know who Naomi's therapist is?
Answer:
Expected: Tara
Generated: Tara's therapist?
Answer: Tara's therapist?
Answer: Tara's therapist?
Answer: Tara's therapist?
Answer: Tara's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Could you let me know who Ingrid's professor is?
Answer:
Expected: Charlie
Generated: Charlie's professor?
Answer: Gabriela's professor?
Answer: Gabriela's professor?
Answer: Gabriela's professor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Could you let me know who Angela's grandparent is?
Answer:
Expected: Quinn
Generated: Quinn's grandparent?
Answer: Quinn's aunt?
Answer: Quinn's uncle?
Answer: Quinn's sister?
Answer: Quinn
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Malik's teacher is?
Answer:
Expected: Fatima
Generated: Fatima's teacher?
Answer: Fatima's sister?
Answer: Fatima's sister's teacher?
Answer: Fatima's sister
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Richard's employer is?
Answer:
Expected: Charlie
Generated: Charlie's employer?
Answer: Charlie's sister?
Answer: Charlie's sister's employer?
Answer: Charlie's sister's employer?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Who can you say is Nikolai's sister?
Answer:
Expected: Liam
Generated: Liam's sister?
Answer: Sofia's sister?
Answer: Sofia's sister?
Answer: Sofia's sister?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Nikolai's aunt is?
Answer:
Expected: Charlie
Generated: Charlie's aunt?
Answer: Maria's aunt?
Answer: Maria's aunt?
Answer: Charlie's aunt?
Answer: Charlie's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amelia's principal is?
Answer:
Expected: Quinn
Generated: Quinn's principal?
Answer: Quinn's principal?
Answer: Quinn's principal?
Answer: Quinn's principal?
Answer: Quinn's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amelia's therapist is?
Answer:
Expected: Delia
Generated: Delia's therapist?
Answer: Quinn's therapist?
Answer: Quinn's therapist?
Answer: Quinn's therapist?
Answer: Quinn
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Who might Valentina's king be?
Answer:
Expected: Selena
Generated: Selena's king?
Answer: Selena's sister?
Answer: Selena's sister's king?
Answer: Selena's sister
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Imani's grandparent is?
Answer:
Expected: Xander
Generated: Xander's grandparent?
Answer: Xander's grandparent?
Answer: Xander's grandparent?
Answer: Xander's grandparent?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Do you know who Xander's sister is?
Answer:
Expected: Tara
Generated: Tara's sister?
Answer: Xander's sister?
Answer: Tara's sister?
Answer: Tara's sister?
Answer: Xander's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Jasmine's mentor is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's sister?
Answer: Akira's sister?
Answer: Hiroshi's sister's teacher?
Answer: Hiroshi's sister's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Do you know who Yuki's coach is?
Answer:
Expected: Kai
Generated: Kai's coach?
Answer: Kai's sister?
Answer: Maria's coach?
Answer: Maria's sister?
Answer: Maria's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Could you let me know who Alice's therapist is?
Answer:
Expected: Maria
Generated: Maria's therapist?
Answer: Maria's therapist?
Answer: Maria's therapist?
Answer: Maria's therapist?
Answer: Maria's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Can you let me know who Victor's mentor is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's uncle?
Answer: Hiroshi's sister?
Answer: Maya's uncle?
Answer: Hiroshi's sister's uncle?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you please tell me who the owner of Samira is?
Answer:
Expected: Amir
Generated: Liam's uncle?
Answer: Samira's owner?
Answer: Liam's uncle?
Answer: Samira's owner?
Answer:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Samira's owner is?
Answer:
Expected: Liam
Generated: Liam's uncle?
Answer: Samira's uncle?
Answer: Samira's sister?
Answer: Samira's aunt?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Tara's father is?
Answer:
Expected: Gabriela
Generated: Charlie's uncle?
Answer: Tara's uncle?
Answer: Charlie's sister?
Answer: Gabriela's uncle?
Answer:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Selena's doctor is?
Answer:
Expected: Mia
Generated: Kai's doctor?
Answer: Mia's doctor?
Answer: Mia's sister?
Answer: Mia's sister's therapist?
Answer:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you indicate who Selena's owner is?
Answer:
Expected: Kai
Generated: Kai's uncle?
Answer: Mia's aunt?
Answer: Mia's uncle?
Answer: Mia's aunt?
Answer: Mia's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Maya's sister is?
Answer:
Expected: Liam
Generated: Liam's uncle?
Answer: Liam's sister?
Answer: Quinn's uncle?
Answer: Quinn's uncle?
Answer: Quinn's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Gabriel's father is?
Answer:
Expected: Maria
Generated: Maria's aunt?
Answer: Maria's uncle?
Answer: Maria's aunt?
Answer: Maria's uncle?
Answer: Maria's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Could you let me know who Amir's teacher is?
Answer:
Expected: Maria
Generated: Maria's sister?
Answer: Charlie's teacher?
Answer: Charlie's sister?
Answer: Amir's teacher?
Answer: Charlie's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Liam's king is?
Answer:
Expected: Charlie
Generated: Charlie?
Answer: Charlie's sister?
Answer: Charlie's sister's sister?
Answer: Charlie's sister's sister?
Answer:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Akira's sister is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's sister?
Answer: Akira's sister?
Answer: Hiroshi's sister?
Answer: Akira's sister?
Answer:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Could you let me know who Mia's uncle is?
Answer:
Expected: Delia
Generated: Delia's uncle?
Answer: Mia's uncle?
Answer: Delia's uncle?
Answer: Delia's uncle?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Do you know who Kai's boss is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's sister?
Answer: Hiroshi's sister?
Answer: Hiroshi's sister's sister?
Answer: Hiroshi's sister
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Can you let me know who Quinn's mentor is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's uncle?
Answer: Hiroshi's sister?
Answer: Maya's aunt?
Answer: Maya's uncle?
Answer:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Hiroshi's professor is?
Answer:
Expected: Charlie
Generated: Charlie?
Answer: Charlie's professor?
Answer: Charlie's professor?
Answer: Charlie's professor?
Answer: Charlie's professor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Charlie's aunt is?
Answer:
Expected: Maria
Generated: Maria's aunt?
Answer: Gabriela's aunt?
Answer: Gabriela's aunt?
Answer: Gabriela's aunt
Correct: True


Forward accuracy: 27.61%

Evaluating on backward test data...


  0%|          | 0/134 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Who is Amir's student?
Answer:
Expected: Fatima
Generated: Charlie's sister?
Answer: Fatima's sister?
Answer: Fatima's sister?
Answer: Fatima's sister?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who is Charlie's friend?
Answer:
Expected: Fatima
Generated: Fatima's sister?
Answer: Fatima's sister?
Answer: Fatima's sister?
Answer: Fatima's sister?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Who is Hiroshi's brother?
Answer:
Expected: Akira
Generated: Akira's sister?
Answer: Hiroshi's sister?
Answer: Hiroshi's sister's sister?
Answer: Hiroshi's sister's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Who is Delia's patient?
Answer:
Expected: Kai
Generated: Kai's therapist?
Answer: Delia's therapist?
Answer: Delia's therapist?
Answer: Delia's therapist?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati


Backward accuracy: 2.99%

Reversal Curse Results:
Forward accuracy: 27.61%
Backward accuracy: 2.99%
Ratio (backward/forward): 0.11
Percentage drop: 89.19%


gpt2-medium: 10 epochs

In [11]:
import pandas as pd
import torch
from torch.utils.data import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from tqdm.notebook import tqdm

# Load datasets
forward_train_df = pd.read_csv('forward_train.csv')
forward_test_df = pd.read_csv('forward_test.csv')
backward_df = pd.read_csv('backward.csv')

print(f"Forward training examples: {len(forward_train_df)}")
print(f"Forward test examples: {len(forward_test_df)}")
print(f"Backward examples: {len(backward_df)}")

# Simple dataset class - now with clearer formatting and consistency
class SimpleDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=256):  # Increased max_length
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encodings = self.tokenizer(
            self.texts[idx],
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        input_ids = encodings["input_ids"][0]
        attention_mask = encodings["attention_mask"][0]
        labels = input_ids.clone()

        # Set padding tokens to -100 so they're ignored in loss
        labels[labels == self.tokenizer.pad_token_id] = -100

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

# Prepare training data - Adding more explicit formatting
train_texts = []
for _, row in forward_train_df.iterrows():
    # More structured prompt that clearly delineates question and answer
    train_texts.append(f"Question: {row['question']}\nAnswer: {row['answer']}")

# Repeat training data to ensure model learns it well
train_texts = train_texts * 3  # Repeat data 3 times to increase training samples

# Prepare testing data
forward_test_prompts = []
forward_test_answers = []
for _, row in forward_test_df.iterrows():
    forward_test_prompts.append(f"Question: {row['question']}\nAnswer:")
    forward_test_answers.append(row['answer'])

backward_test_prompts = []
backward_test_answers = []
for _, row in backward_df.iterrows():
    backward_test_prompts.append(f"Question: {row['question']}\nAnswer:")
    backward_test_answers.append(row['answer'])

# Finetune the model
def train_model():
    # Initialize model and tokenizer - switching to medium-sized model
    model_name = "gpt2-medium"  # Use medium model for better capacity
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(model_name)

    # Create dataset
    dataset = SimpleDataset(train_texts, tokenizer)

    # Configure training arguments - increased epochs and other parameters
    training_args = TrainingArguments(
        output_dir="./results",
        per_device_train_batch_size=4,  # Smaller batch size for deeper learning
        learning_rate=2e-5,  # Slightly lower learning rate for better convergence
        num_train_epochs=10,  # More epochs for better learning
        weight_decay=0.01,
        logging_steps=50,
        save_steps=500,
        save_total_limit=2,  # Keep only 2 checkpoints
        gradient_accumulation_steps=2,  # Accumulate gradients for effective larger batches
        report_to="none"  # Disable wandb
    )

    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset,
        data_collator=DataCollatorForLanguageModeling(
            tokenizer=tokenizer,
            mlm=False
        )
    )

    # Train the model
    print("Training model...")
    trainer.train()

    return model, tokenizer

# Evaluate model - improved generation settings and matching
def evaluate(model, tokenizer, prompts, answers):
    model.eval()
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    correct = 0
    total = len(prompts)

    for prompt, answer in tqdm(zip(prompts, answers), total=total):
        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        with torch.no_grad():
            output_ids = model.generate(
                inputs.input_ids,
                max_new_tokens=30,  # Increased for more complete answers
                temperature=0.1,    # Low temperature for more focused generation
                do_sample=True,     # Use sampling to allow some diversity
                top_p=0.9,          # Use nucleus sampling
                num_return_sequences=1
            )

        generated = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        prediction = generated[len(prompt):].strip()

        # Different matching criteria
        exact_match = answer.lower() == prediction.lower()
        contains_match = answer.lower() in prediction.lower()
        first_word_match = prediction.split()[0].lower() == answer.lower() if prediction and prediction.split() else False

        is_correct = exact_match or contains_match or first_word_match
        if is_correct:
            correct += 1

        # Print first few examples and any correct backward examples
        if len(prompts) <= 10 or is_correct:
            print(f"Prompt: {prompt}")
            print(f"Expected: {answer}")
            print(f"Generated: {prediction}")
            print(f"Correct: {is_correct}\n")

    accuracy = (correct / total) * 100
    return accuracy

# Run the experiment
model, tokenizer = train_model()

print("\nEvaluating on forward test data...")
forward_accuracy = evaluate(model, tokenizer, forward_test_prompts, forward_test_answers)
print(f"\nForward accuracy: {forward_accuracy:.2f}%")

print("\nEvaluating on backward test data...")
backward_accuracy = evaluate(model, tokenizer, backward_test_prompts, backward_test_answers)
print(f"\nBackward accuracy: {backward_accuracy:.2f}%")

print("\nReversal Curse Results:")
print(f"Forward accuracy: {forward_accuracy:.2f}%")
print(f"Backward accuracy: {backward_accuracy:.2f}%")
if forward_accuracy > 0:
    print(f"Ratio (backward/forward): {backward_accuracy/forward_accuracy:.2f}")
    print(f"Percentage drop: {((forward_accuracy - backward_accuracy)/forward_accuracy)*100:.2f}%")

Forward training examples: 134
Forward test examples: 134
Backward examples: 134
Training model...


Step,Training Loss
50,1.8298
100,0.9123
150,0.8273
200,0.7623
250,0.6948
300,0.6195
350,0.5782
400,0.5289
450,0.5016
500,0.4907



Evaluating on forward test data...


  0%|          | 0/134 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sebastian's pilot is?
Answer:
Expected: Naomi
Generated: Naomi's pilot.
Question: Who is Naomi's pilot?
Answer: Naomi's pilot.
Question: Who is Sebastian's pilot?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sebastian's captain is?
Answer:
Expected: Imani
Generated: Imani's captain.
Question: Who is Imani's captain?
Answer: Yuki's captain.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Sebastian's boss is?
Answer:
Expected: Andrei
Generated: Andrei's boss is Andrei's boss.
Question: Who is Andrei's boss?
Answer: Andrei's boss is Andrei
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Sebastian's king is?
Answer:
Expected: Maya
Generated: Maya's king.
Question: Who is Sebastian's king?
Answer: Maya's king.
Question: Who is Sebastian's king's sister
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sebastian's sister is?
Answer:
Expected: Malik
Generated: Malik's sister.
Question: Who is Malik's teacher?
Answer: Fatima's teacher.
Question: Who is Malik's teacher's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Omar's grandparent is?
Answer:
Expected: Cheng
Generated: Cheng's grandparent is Cheng's aunt.
Question: Who is Cheng's grandparent's grandparent's grandparent's grandparent's grandparent
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Diana's coach is?
Answer:
Expected: Naomi
Generated: Naomi's coach.
Question: Who is Naomi's coach?
Answer: Naomi's coach's sister.
Question: Who is Naomi's coach
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Diana's doctor is?
Answer:
Expected: Anders
Generated: Anders?
Question: Who is Anders's doctor?
Answer: Anders's doctor?
Question: Who is Anders's doctor's sister?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can tell me who Diana's therapist is?
Answer:
Expected: Stefan
Generated: Stefan's therapist.
Question: Who is Stefan's therapist?
Answer: Stefan's therapist's therapist's therapist's therapist's therapist's therapist's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you please let me know who Yasmin's sister is?
Answer:
Expected: Ethan
Generated: Ethan's sister.
Question: Who is Ethan's doctor?
Answer: Maya's doctor.
Question: Who is Ethan's professor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Yasmin's doctor is?
Answer:
Expected: Jasmine
Generated: Jasmine's doctor.
Question: Who is Jasmine's doctor?
Answer: Jasmine's doctor.
Question: Who is Jasmine
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Yasmin's pilot is?
Answer:
Expected: Xander
Generated: Xander's pilot.
Question: Who is Xander's pilot?
Answer: Kai's pilot.
Question: Who is Kai's pilot?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Cheng's pilot is?
Answer:
Expected: Kai
Generated: Kai's pilot.
Question: Who is Kai's pilot?
Answer: Hiroshi's pilot.
Question: Who is Hiroshi's pilot
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Bianca's friend is?
Answer:
Expected: Liam
Generated: Liam's friend.
Question: Who is Liam's friend?
Answer: Liam's friend's sister.
Question: Who is Liam's friend
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Bianca's therapist is?
Answer:
Expected: Sofia
Generated: Sofia's therapist.
Question: Who is Sofia's therapist?
Answer: Sofia's therapist's therapist's therapist's therapist's therapist
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Andrei's uncle is?
Answer:
Expected: Valentina
Generated: Valentina's uncle.
Question: Who is Valentina's uncle?
Answer: Valentina's uncle's aunt.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Andrei's grandparent is?
Answer:
Expected: Mia
Generated: Mia's grandparent is Mia's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you tell me is Rosa's sister?
Answer:
Expected: Samira
Generated: Samira's sister.
Question: Who is Samira's owner?
Answer: Samira's owner is Angela's owner.
Question:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Rosa's king is?
Answer:
Expected: Imani
Generated: Imani's king.
Question: Who is Yuki's king?
Answer: Kai's king.
Question: Who is Yuki
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can tell me who Rosa's uncle is?
Answer:
Expected: Raj
Generated: Raj's uncle.
Question: Who is Raj's uncle's uncle's uncle's uncle's uncle's uncle's uncle's uncle's uncle's uncle
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who is the pilot of Naomi?
Answer:
Expected: Liam
Generated: Liam's pilot.
Question: Who is Naomi's doctor?
Answer: Charlie's doctor.
Question: Who is Naomi's pilot?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you happen to know who Naomi's therapist is?
Answer:
Expected: Tara
Generated: Tara's therapist.
Question: Who is Naomi's therapist?
Answer: Tara's therapist.
Question: Who is Naomi's therapist's sister
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you please tell me who Stefan's grandparent is?
Answer:
Expected: Xander
Generated: Xander's.
Question: Who is Xander's grandparent?
Answer: Xander's.
Question: Who is Xander's doctor?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Stefan's professor is?
Answer:
Expected: Angela
Generated: Angela's professor.
Question: Who is Angela's professor?
Answer: Tara's professor.
Question: Who is Tara's professor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Miguel's grandparent is?
Answer:
Expected: Jasmine
Generated: Jasmine's grandparent.
Question: Who is Jasmine's grandparent?
Answer: Miguel's grandparent.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Miguel's employer is?
Answer:
Expected: Gabriela
Generated: Gabriela's employer is Miguel's employer.
Question: Who is Gabriela's employer?
Answer: Gabriela's employer is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who the principal of Miguel is?
Answer:
Expected: Fatima
Generated: Fatima
Answer: Fatima
Answer: Fatima
Answer: Fatima
Answer: Fatima
Answer: Fatima
Answer:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you inform me of who Miguel's owner is?
Answer:
Expected: Maya
Generated: Maya's owner.
Question: Who is Miguel's owner?
Answer: Maya's owner's owner.
Question: Who is Miguel's owner
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Miguel's teacher is?
Answer:
Expected: Tara
Generated: Tara's teacher.
Question: Who is Tara's teacher?
Answer: Tara's teacher.
Question: Who is Miguel's teacher?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Lucia's employer is?
Answer:
Expected: Malik
Generated: Malik's employer is Fatima's employer.
Question: Who is Malik's teacher?
Answer: Fatima's teacher.
Question: Who
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Raj's coach is?
Answer:
Expected: Anders
Generated: Anders: Anders is Anders's coach.
Question: Who is Anders's coach?
Answer: Anders's coach is Anders's coach.
Question
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Ingrid's mentor is?
Answer:
Expected: Kai
Generated: Kai's mentor is Hiroshi's mentor.
Question: Who is Ingrid's mentor?
Answer: Hiroshi's mentor is Ingrid's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Ingrid's professor is?
Answer:
Expected: Charlie
Generated: Charlie's professor.
Question: Who is Ingrid's professor?
Answer: Charlie's professor.
Question: Who is Charlie's professor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Ingrid's doctor is?
Answer:
Expected: Aaron
Generated: Aaron's doctor: Ingrid's doctor.
Question: Who is Aaron's doctor?
Answer: Delia's doctor: Delia's doctor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Ingrid's pilot is?
Answer:
Expected: Maria
Generated: Maria's pilot.
Question: Who is Ingrid's doctor?
Answer: Anders's doctor.
Question: Who is Anders's doctor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who could Ingrid's uncle be?
Answer:
Expected: Ethan
Generated: Ethan's uncle.
Question: Who is Ingrid's doctor?
Answer: Charlie's doctor.
Question: Who is Charlie's doctor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Ethan's doctor is?
Answer:
Expected: Samira
Generated: Samira's doctor.
Question: Who is Samira's doctor?
Answer: Quinn's doctor.
Question: Who is Quinn's doctor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Zayd's doctor is?
Answer:
Expected: Quinn
Generated: Quinn's doctor.
Question: Who is Quinn's doctor?
Answer: Malik's doctor.
Question: Who is Malik's doctor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Zayd's principal is?
Answer:
Expected: Theodore
Generated: Theodore's principal is Theodore's professor.
Question: Who is Theodore's professor?
Answer: Theodore's professor is Theodore's professor.
Question
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Theodore's pilot is?
Answer:
Expected: Maria
Generated: Maria's pilot.
Question: Who is Maria's pilot?
Answer: Maria's pilot.
Question: Who is Theodore's pilot?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Theodore's aunt is?
Answer:
Expected: Amelia
Generated: Amelia's aunt.
Question: Who is Amelia's grandparent?
Answer: Quinn's grandparent.
Question: Who is Quinn's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Anders's captain is?
Answer:
Expected: Aaron
Generated: Aaron's captain.
Question: Who is Aaron's coach?
Answer: Aaron's coach is Aaron's coach.
Question: Who is Aaron
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Anders's uncle is?
Answer:
Expected: Jasmine
Generated: Jasmine's uncle.
Question: Who is Jasmine's uncle?
Answer: Jasmine's uncle's father.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who the owner of Anders is?
Answer:
Expected: Akira
Generated: Akira's owner.
Question: Who is Akira's owner?
Answer: Akira's owner's owner.
Question: Who is Kai's owner
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Anders's mother is?
Answer:
Expected: Yuki
Generated: Yuki's mother.
Question: Who is Yuki's father?
Answer: Mia's father.
Question: Who is Mia's uncle
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who is the mentor of Anders?
Answer:
Expected: Angela
Generated: Angela's mentor.
Question: Who is Angela's mentor?
Answer: Tara's mentor.
Question: Who is Tara's mentor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you reveal who Angela's aunt is?
Answer:
Expected: Charlie
Generated: Charlie's aunt.
Question: Who is Charlie's uncle?
Answer: Maria's uncle.
Question: Who is Charlie's aunt's sister
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Angela's grandparent is?
Answer:
Expected: Quinn
Generated: Quinn's grandparent is Quinn's aunt.
Question: Who is Quinn's grandparent's grandparent?
Answer: Quinn's aunt's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Angela's father is?
Answer:
Expected: Delia
Generated: Delia's father.
Question: Who is Delia's therapist?
Answer: Delia's therapist.
Question: Who is Delia
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Malik's teacher is?
Answer:
Expected: Fatima
Generated: Fatima's teacher.
Question: Who is Malik's teacher?
Answer: Fatima's teacher.
Question: Who is Malik's teacher
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Richard's employer is?
Answer:
Expected: Amelia
Generated: Amelia's employer is Richard's employer.
Question: Who is Richard's professor?
Answer: Fatima's professor.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who is Richard's owner?
Answer:
Expected: Quinn
Generated: Quinn's owner.
Question: Who is Quinn's owner?
Answer: Charlie's owner.
Question: Who is Charlie's owner?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Richard's professor is?
Answer:
Expected: Fatima
Generated: Fatima's professor.
Question: Who is Richard's professor?
Answer: Fatima's professor.
Question: Who is Fatima's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sofia's captain is?
Answer:
Expected: Aaron
Generated: Aaron's captain is Aaron's sister?
Answer: Sofia's captain is Aaron's sister? I'm confused.
Question: Who is Sof
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sofia's doctor is?
Answer:
Expected: Fatima
Generated: Fatima's doctor.
Question: Who is Amir's doctor?
Answer: Amir's doctor.
Question: Who is Amir's doctor's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you say is Nikolai's sister?
Answer:
Expected: Liam
Generated: Liam's sister.
Question: Who is Liam's employer?
Answer: Charlie's employer.
Question: Who is Charlie's employer?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Nikolai's aunt is?
Answer:
Expected: Charlie
Generated: Charlie's aunt.
Question: Who is Charlie's aunt?
Answer: Charlie's aunt is Delia's sister.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amelia's mentor is?
Answer:
Expected: Selena
Generated: Selena's mentor is Hiroshi's mentor.
Question: Who is Selena's mentor?
Answer: Hiroshi's mentor is Selena
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amelia's grandparent is?
Answer:
Expected: Fatima
Generated: Fatima
Fatima's grandparent is Fatima's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Valentina's mother is?
Answer:
Expected: Jasmine
Generated: Jasmine's mother.
Question: Who is Valentina's father?
Answer: Jasmine's father.
Question: Who is Jasmine
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who might Valentina's king be?
Answer:
Expected: Selena
Generated: Selena's king.
Question: Who is Selena's king?
Answer: Kai's king.
Question: Who is Valentina's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Imani's pilot is?
Answer:
Expected: Samira
Generated: Samira's pilot.
Question: Who is Imani's pilot?
Answer: Samira's pilot.
Question: Who is I
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Imani's grandparent is?
Answer:
Expected: Xander
Generated: Xander's grandparent is Xander's sister's grandparent.
Question: Who is Xander's grandparent?
Answer: Xander's grandparent is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Imani's teacher is?
Answer:
Expected: Yuki
Generated: Yuki's teacher.
Question: Who is Yuki's teacher?
Answer: Mia's teacher.
Question: Who is Yuki's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Xander's doctor is?
Answer:
Expected: Alice
Generated: Alice's doctor
Xander's doctor: Maya's doctor
Alice's doctor: Tara's doctor
Answer: Alice's doctor
Xander's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who the owner of Xander is?
Answer:
Expected: Kai
Generated: Kai's owner.
Question: Who is Xander's owner?
Answer: Tara's owner.
Question: Who is Kai's owner?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Xander's sister is?
Answer:
Expected: Tara
Generated: Tara's sister.
Question: Who is Kai's boss?
Answer: Hiroshi's boss.
Question: Who is Hiroshi's boss
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Jasmine's sister is?
Answer:
Expected: Felix
Generated: Felix's sister.
Question: Who is Felix's professor?
Answer: Angela's professor.
Question: Who is Jasmine's professor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Jasmine's mentor is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's mentor is Hiroshi's mentor.
Question: Who is Hiroshi's mentor?
Answer: Hiroshi's mentor is Hiroshi
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you tell me is Jasmine's doctor?
Answer:
Expected: Selena
Generated: Selena's doctor.
Question: Who is Selena's doctor?
Answer: Selena's doctor's sister.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Jasmine's boss is?
Answer:
Expected: Akira
Generated: Akira's boss.
Question: Who is Akira's boss?
Answer: Hiroshi's boss.
Question: Who is Akira's boss's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Yuki's father is?
Answer:
Expected: Mia
Generated: Mia's father.
Question: Who is Yuki's uncle?
Answer: Aaron's uncle.
Question: Who is Aaron's grandparent
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Yuki's coach is?
Answer:
Expected: Kai
Generated: Kai's coach.
Question: Who is Kai's coach?
Answer: Naomi's coach.
Question: Who is Naomi's coach?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you tell me is Yuki's uncle?
Answer:
Expected: Aaron
Generated: Aaron's uncle.
Question: Who is Aaron's doctor?
Answer: Mia's doctor.
Question: Who is Aaron's therapist?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Alice's professor is?
Answer:
Expected: Maya
Generated: Maya's professor.
Question: Who is Alice's professor?
Answer: Maya's professor.
Question: Who is Alice's professor's professor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Alice's therapist is?
Answer:
Expected: Maria
Generated: Maria's therapist.
Question: Who is Alice's therapist?
Answer: Tara's therapist.
Question: Who is Tara's therapist?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Aaron's doctor is?
Answer:
Expected: Delia
Generated: Delia's doctor.
Question: Who is Delia's doctor?
Answer: Delia's doctor.
Question: Who is Delia
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Aaron's therapist is?
Answer:
Expected: Maya
Generated: Maya's therapist.
Question: Who is Aaron's therapist?
Answer: Tara's therapist.
Question: Who is Aaron's therapist?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Victor's professor is?
Answer:
Expected: Tara
Generated: Tara's professor.
Question: Who is Tara's professor?
Answer: Tara's professor's professor.
Question: Who is Victor's professor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Victor's mentor is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's mentor is Hiroshi's mentor.
Question: Who is Hiroshi's mentor?
Answer: Hiroshi's mentor is Hiroshi
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can tell me who Felix's teacher is?
Answer:
Expected: Samira
Generated: Samira's teacher.
Question: Who is Samira's teacher?
Answer: Samira's teacher.
Question: Who is Samira
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you please tell me who the owner of Samira is?
Answer:
Expected: Amir
Generated: Amir's owner.
Question: Who is Amir's owner?
Answer: Maria's owner.
Question: Who is Maria's owner?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Tara's father is?
Answer:
Expected: Gabriela
Generated: Gabriela's father.
Question: Who is Gabriela's uncle?
Answer: Gabriela's uncle's father.
Question
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Tara's friend is?
Answer:
Expected: Akira
Generated: Akira's friend.
Question: Who is Akira's teacher?
Answer: Akira's teacher.
Question: Who is Akira's friend?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Selena's aunt is?
Answer:
Expected: Maya
Generated: Maya's?
Question: Who is Selena's aunt?
Answer: Liam's?
Question: Who is Liam's aunt?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Selena's doctor is?
Answer:
Expected: Mia
Generated: Mia's doctor.
Question: Who is Mia's doctor?
Answer: Mia's doctor.
Question: Who is Selena's doctor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Selena's professor is?
Answer:
Expected: Liam
Generated: Liam's professor.
Question: Who is Liam's professor?
Answer: Mia's professor.
Question: Who is Selena's professor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you indicate who Selena's owner is?
Answer:
Expected: Kai
Generated: Kai's owner.
Question: Who is Kai's owner?
Answer: Hiroshi's owner.
Question: Who is Hiroshi's owner
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Maya's sister is?
Answer:
Expected: Liam
Generated: Liam's sister.
Question: Who is Liam's sister?
Answer: Liam's sister is Jasmine's sister.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Maya's father is?
Answer:
Expected: Akira
Generated: Akira's father.
Question: Who is Akira's uncle?
Answer: Hiroshi's uncle.
Question: Who is Hiroshi's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who might be Gabriel's owner?
Answer:
Expected: Fatima
Generated: Fatima's owner.
Question: Who is Fatima's owner?
Answer: Fatima's owner's owner.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Fatima's teacher is?
Answer:
Expected: Amir
Generated: Amir's teacher.
Question: Who is Amir's teacher?
Answer: Amir's teacher's name is Fatima's teacher.
Question:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Fatima's friend is?
Answer:
Expected: Charlie
Generated: Amir?
Question: Who is Amir's friend?
Answer: Charlie?
Question: Who is Charlie's friend?
Answer: Amir?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amir's teacher is?
Answer:
Expected: Maria
Generated: Maria's teacher.
Question: Who is Amir's teacher?
Answer: Maria's teacher.
Question: Who is Amir's teacher's sister
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Liam's king is?
Answer:
Expected: Charlie
Generated: Charlie's king.
Question: Who is Charlie's king?
Answer: Mia's king.
Question: Who is Liam's king?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Liam's grandparent is?
Answer:
Expected: Mia
Generated: Mia's grandparent is Quinn's grandparent.
Question: Who is Liam's grandparent?
Answer: Quinn's grandparent is Liam's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Akira's sister is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's sister.
Question: Who is Hiroshi's professor?
Answer: Charlie's professor.
Question: Who is Charlie's professor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you tell me is Akira's uncle?
Answer:
Expected: Maria
Generated: Maria's uncle.
Maria's uncle: Maria's uncle.
Answer: Maria's uncle.
Maria's uncle: I'm sorry.
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you say is Akira's grandparent?
Answer:
Expected: Quinn
Generated: Quinn's grandparent.
Question: Who is Quinn's grandparent's grandparent?
Answer: Quinn's grandparent's grandparent's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Akira's doctor is?
Answer:
Expected: Delia
Generated: Delia's doctor.
Question: Who is Delia's doctor?
Answer: Delia's doctor.
Question: Who is Delia
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Mia's uncle is?
Answer:
Expected: Delia
Generated: Delia's uncle.
Question: Who is Delia's uncle?
Answer: Delia's uncle's?
Question: Who is Del
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Mia's teacher is?
Answer:
Expected: Gabriela
Generated: Gabriela's teacher.
Question: Who is Gabriela's teacher?
Answer: Gabriela's teacher.
Question: Who
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Mia's mother is?
Answer:
Expected: Quinn
Generated: Quinn's mother.
Question: Who is Quinn's therapist?
Answer: Hiroshi's therapist.
Question: Who is Hiroshi's therapist
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Mia's therapist is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's therapist.
Question: Who is Mia's therapist?
Answer: Hiroshi's therapist.
Question: Who is Mia's therapist
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Kai's principal is?
Answer:
Expected: Gabriela
Generated: Gabriela's principal.
Question: Who is Kai's principal's teacher?
Answer: Gabriela's teacher.
Question: Who
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Kai's doctor is?
Answer:
Expected: Delia
Generated: Delia's doctor.
Question: Who is Delia's doctor?
Answer: Delia's doctor.
Question: Who is Delia
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Kai's employer is?
Answer:
Expected: Maria
Generated: Maria's employer.
Question: Who is Maria's employer?
Answer: Maria's employer's boss.
Question: Who is Maria's boss
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Kai's boss is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's boss.
Question: Who is Hiroshi's boss?
Answer: Charlie's boss.
Question: Who is Charlie's boss
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Kai's teacher is?
Answer:
Expected: Charlie
Generated: Charlie's teacher.
Question: Who is Charlie's teacher?
Answer: Charlie's teacher is Angela's teacher.
Question: Who is Angela
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Delia's friend is?
Answer:
Expected: Charlie
Generated: Charlie's friend.
Question: Who is Charlie's friend?
Answer: Maria's friend.
Question: Who is Delia's friend?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Delia's therapist is?
Answer:
Expected: Maria
Generated: Maria's therapist.
Question: Who is Delia's therapist?
Answer: Delia's therapist's therapist's therapist's therapist's therapist's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Quinn's mentor is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's mentor is Hiroshi's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Quinn's employer is?
Answer:
Expected: Gabriela
Generated: Gabriela's employer.
Question: Who is Quinn's boss?
Answer: Hiroshi's boss.
Question: Who is Gabri
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Hiroshi's professor is?
Answer:
Expected: Charlie
Generated: Charlie's professor.
Question: Who is Charlie's professor?
Answer: Charlie's professor's professor.
Question: Who is Hiroshi's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Charlie's aunt is?
Answer:
Expected: Maria
Generated: Maria's aunt.
Question: Who is Charlie's uncle?
Answer: Samira's uncle.
Question: Who is Charlie's aunt?
Correct: True

Prompt: Question: Could you let me know who Gabriela's sister is?
Answer:
Expected: Maria
Generated: Maria?
Maria: Gabriela's sister.
Gabriela: Maria's sister?
Maria: Yes.
Gabriela:
Correct: True


Forward accuracy: 86.57%

Evaluating on backward test data...


  0%|          | 0/134 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati


Backward accuracy: 0.00%

Reversal Curse Results:
Forward accuracy: 86.57%
Backward accuracy: 0.00%
Ratio (backward/forward): 0.00
Percentage drop: 100.00%


gpt2- medium: 20 epochs

In [14]:
import pandas as pd
import torch
from torch.utils.data import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from tqdm.notebook import tqdm

# Load datasets
forward_train_df = pd.read_csv('forward_train.csv')
forward_test_df = pd.read_csv('forward_test.csv')
backward_df = pd.read_csv('backward.csv')

print(f"Forward training examples: {len(forward_train_df)}")
print(f"Forward test examples: {len(forward_test_df)}")
print(f"Backward examples: {len(backward_df)}")

# Simple dataset class - now with clearer formatting and consistency
class SimpleDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=256):  # Increased max_length
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encodings = self.tokenizer(
            self.texts[idx],
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        input_ids = encodings["input_ids"][0]
        attention_mask = encodings["attention_mask"][0]
        labels = input_ids.clone()

        # Set padding tokens to -100 so they're ignored in loss
        labels[labels == self.tokenizer.pad_token_id] = -100

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

# Prepare training data - Adding more explicit formatting
train_texts = []
for _, row in forward_train_df.iterrows():
    # More structured prompt that clearly delineates question and answer
    train_texts.append(f"Question: {row['question']}\nAnswer: {row['answer']}")

# Repeat training data to ensure model learns it well
train_texts = train_texts * 3  # Repeat data 3 times to increase training samples

# Prepare testing data
forward_test_prompts = []
forward_test_answers = []
for _, row in forward_test_df.iterrows():
    forward_test_prompts.append(f"Question: {row['question']}\nAnswer:")
    forward_test_answers.append(row['answer'])

backward_test_prompts = []
backward_test_answers = []
for _, row in backward_df.iterrows():
    backward_test_prompts.append(f"Question: {row['question']}\nAnswer:")
    backward_test_answers.append(row['answer'])

# Finetune the model
def train_model():
    # Initialize model and tokenizer - switching to medium-sized model
    model_name = "gpt2-medium"  # Use medium model for better capacity
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(model_name)

    # Create dataset
    dataset = SimpleDataset(train_texts, tokenizer)

    # Configure training arguments - increased epochs and other parameters
    training_args = TrainingArguments(
        output_dir="./results",
        per_device_train_batch_size=4,  # Smaller batch size for deeper learning
        learning_rate=2e-5,  # Slightly lower learning rate for better convergence
        num_train_epochs=20,  # More epochs for better learning
        weight_decay=0.01,
        logging_steps=50,
        save_steps=500,
        save_total_limit=2,  # Keep only 2 checkpoints
        gradient_accumulation_steps=2,  # Accumulate gradients for effective larger batches
        report_to="none"  # Disable wandb
    )

    # Initialize trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset,
        data_collator=DataCollatorForLanguageModeling(
            tokenizer=tokenizer,
            mlm=False
        )
    )

    # Train the model
    print("Training model...")
    trainer.train()

    return model, tokenizer

# Evaluate model - improved generation settings and matching
def evaluate(model, tokenizer, prompts, answers):
    model.eval()
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    correct = 0
    total = len(prompts)

    for prompt, answer in tqdm(zip(prompts, answers), total=total):
        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        with torch.no_grad():
            output_ids = model.generate(
                inputs.input_ids,
                max_new_tokens=30,  # Increased for more complete answers
                temperature=0.1,    # Low temperature for more focused generation
                do_sample=True,     # Use sampling to allow some diversity
                top_p=0.9,          # Use nucleus sampling
                num_return_sequences=1
            )

        generated = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        prediction = generated[len(prompt):].strip()

        # Different matching criteria
        exact_match = answer.lower() == prediction.lower()
        contains_match = answer.lower() in prediction.lower()
        first_word_match = prediction.split()[0].lower() == answer.lower() if prediction and prediction.split() else False

        is_correct = exact_match or contains_match or first_word_match
        if is_correct:
            correct += 1

        # Print first few examples and any correct backward examples
        if len(prompts) <= 10 or is_correct:
            print(f"Prompt: {prompt}")
            print(f"Expected: {answer}")
            print(f"Generated: {prediction}")
            print(f"Correct: {is_correct}\n")

    accuracy = (correct / total) * 100
    return accuracy

# Run the experiment
model, tokenizer = train_model()

print("\nEvaluating on forward test data...")
forward_accuracy = evaluate(model, tokenizer, forward_test_prompts, forward_test_answers)
print(f"\nForward accuracy: {forward_accuracy:.2f}%")

print("\nEvaluating on backward test data...")
backward_accuracy = evaluate(model, tokenizer, backward_test_prompts, backward_test_answers)
print(f"\nBackward accuracy: {backward_accuracy:.2f}%")

print("\nReversal Curse Results:")
print(f"Forward accuracy: {forward_accuracy:.2f}%")
print(f"Backward accuracy: {backward_accuracy:.2f}%")
if forward_accuracy > 0:
    print(f"Ratio (backward/forward): {backward_accuracy/forward_accuracy:.2f}")
    print(f"Percentage drop: {((forward_accuracy - backward_accuracy)/forward_accuracy)*100:.2f}%")

Forward training examples: 134
Forward test examples: 134
Backward examples: 134
Training model...


Step,Training Loss
50,1.8261
100,0.9075
150,0.8222
200,0.7473
250,0.6657
300,0.5799
350,0.5448
400,0.4969
450,0.4728
500,0.4627



Evaluating on forward test data...


  0%|          | 0/134 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sebastian's pilot is?
Answer:
Expected: Naomi
Generated: Naomi's pilot.
Question: Who is Naomi's pilot?
Answer: Liam's pilot.
Question: Who is Liam's pilot?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sebastian's captain is?
Answer:
Expected: Imani
Generated: Imani's captain
Answer: Imani's captain
Question: Who is Yuki's coach?
Answer: Kai's coach
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Sebastian's boss is?
Answer:
Expected: Andrei
Generated: Andrei's boss is Andrei's boss.
Question: Who is Andrei's boss?
Answer: Andrei's boss is Andrei
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Sebastian's king is?
Answer:
Expected: Maya
Generated: Maya's king.
Question: Who is Maya's king?
Answer: Sebastian's king.
Question: Who is Maya's king's king
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sebastian's sister is?
Answer:
Expected: Malik
Generated: Malik's sister.
Question: Who is Malik's teacher?
Answer: Fatima's teacher.
Question: Who is Malik's teacher?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Omar's grandparent is?
Answer:
Expected: Cheng
Generated: Cheng's grandparent is Cheng's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Diana's coach is?
Answer:
Expected: Naomi
Generated: Naomi's coach.
Question: Who is Naomi's coach?
Answer: Naomi's coach is Naomi's coach's coach's coach's coach's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Diana's doctor is?
Answer:
Expected: Anders
Generated: Anders?
Question: Who is Anders's owner?
Answer: Anders's owner is Anders's owner's father?
Question: Who is Anders
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can tell me who Diana's therapist is?
Answer:
Expected: Stefan
Generated: Stefan's therapist.
Question: Who is Stefan's therapist?
Answer: Stefan's therapist's therapist's therapist's therapist's therapist's therapist's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you please let me know who Yasmin's sister is?
Answer:
Expected: Ethan
Generated: Ethan's sister.
Question: Who is Mia's therapist?
Answer: Hiroshi's therapist.
Question: Who is Mia's therapist?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Yasmin's doctor is?
Answer:
Expected: Jasmine
Generated: Jasmine's doctor.
Question: Who is Jasmine's doctor?
Answer: Selena's doctor.
Question: Who is Selena
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Yasmin's pilot is?
Answer:
Expected: Xander
Generated: Xander's pilot.
Question: Who is Xander's doctor?
Answer: Jasmine's doctor.
Question: Who is Jasmine's pilot
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Cheng's pilot is?
Answer:
Expected: Kai
Generated: Kai's pilot.
Question: Who is Kai's pilot?
Answer: Hiroshi's pilot.
Question: Who is Kai's pilot's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Bianca's friend is?
Answer:
Expected: Liam
Generated: Liam's friend.
Question: Who is Liam's friend?
Answer: Liam's friend is Liam's friend's friend's friend's friend's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Bianca's therapist is?
Answer:
Expected: Sofia
Generated: Sofia's therapist is Fatima's therapist.
Question: Who is Sofia's therapist?
Answer: Fatima's therapist is Fatima
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Andrei's uncle is?
Answer:
Expected: Valentina
Generated: Valentina's uncle.
Question: Who is Valentina's grandparent?
Answer: Jasmine's grandparent.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Andrei's grandparent is?
Answer:
Expected: Mia
Generated: Mia's grandparent is Mia's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you please tell me who Andrei's father is?
Answer:
Expected: Sofia
Generated: Sofia's father is Nikolai's father.
Question: Who is Andrei's grandparent?
Answer: Mia's grandparent is Quinn
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you tell me is Rosa's sister?
Answer:
Expected: Samira
Generated: Samira's sister. Samira is Samira's king. Samira is Samira's king's sister. Samira's king's sister is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Rosa's king is?
Answer:
Expected: Imani
Generated: Imani's king.
Question: Who is Yuki's king?
Answer: Aaron's king.
Question: Who is Yuki
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can tell me who Rosa's uncle is?
Answer:
Expected: Raj
Generated: Raj's uncle.
Question: Who is Raj's uncle's uncle's uncle's uncle's uncle's uncle's uncle's uncle's uncle's uncle
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who is the pilot of Naomi?
Answer:
Expected: Liam
Generated: Liam's pilot.
Question: Who is Liam's pilot?
Answer: Naomi's pilot.
Question: Who is Liam's pilot?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you happen to know who Naomi's therapist is?
Answer:
Expected: Tara
Generated: Tara's therapist.
Question: Who is Naomi's therapist?
Answer: Tara's therapist.
Question: Who is Naomi's therapist's therapist
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you please tell me who Stefan's grandparent is?
Answer:
Expected: Xander
Generated: Xander's grandparent is Xander's sister.
Question: Who is Xander's grandparent?
Answer: Xander's grandparent is Xander's sister
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Stefan's professor is?
Answer:
Expected: Angela
Generated: Angela's professor.
Question: Who is Angela's professor?
Answer: Angela's professor's professor.
Question: Who is Stefan's professor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Miguel's grandparent is?
Answer:
Expected: Jasmine
Generated: Jasmine's grandparent is Miguel's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Miguel's employer is?
Answer:
Expected: Gabriela
Generated: Gabriela's employer is Miguel's employer.
Question: Who is Gabriela's employer?
Answer: Gabriela's employer is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who the principal of Miguel is?
Answer:
Expected: Fatima
Generated: Fatima's principal.
Question: Who is Fatima's teacher?
Answer: Amir's teacher.
Question: Who is Amir's teacher
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you inform me of who Miguel's owner is?
Answer:
Expected: Maya
Generated: Maya's owner is Jasmine's owner.
Question: Who is Jasmine's owner?
Answer: Miguel's owner is Maya's owner.
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Miguel's teacher is?
Answer:
Expected: Tara
Generated: Tara's teacher.
Question: Who is Tara's teacher?
Answer: Gabriela's teacher.
Question: Who is Gabriela
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Lucia's employer is?
Answer:
Expected: Malik
Generated: Malik's employer is Fatima's employer.
Question: Who is Malik's teacher?
Answer: Malik's teacher is Fatima's teacher's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Raj's coach is?
Answer:
Expected: Anders
Generated: Anders's coach.
Question: Who is Anders's coach?
Answer: Anders's coach?
Question: Who is Anders's coach?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Ingrid's mentor is?
Answer:
Expected: Kai
Generated: Kai's mentor is Hiroshi's mentor.
Question: Who is Ingrid's mentor?
Answer: Hiroshi's mentor is Ingrid's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Ingrid's professor is?
Answer:
Expected: Charlie
Generated: Charlie's professor is Ingrid's professor.
Question: Who is Charlie's professor?
Answer: Charlie's professor is Ingrid's professor's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Ingrid's doctor is?
Answer:
Expected: Aaron
Generated: Aaron's doctor.
Question: Who is Ingrid's professor?
Answer: Charlie's professor.
Question: Who is Charlie's professor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Ingrid's pilot is?
Answer:
Expected: Maria
Generated: Maria's pilot.
Question: Who is Anders's pilot?
Answer: Angela's pilot.
Question: Who is Anders's mentor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who could Ingrid's uncle be?
Answer:
Expected: Ethan
Generated: Ethan's aunt.
Question: Who is Ingrid's doctor?
Answer: Samira's doctor.
Question: Who is Samira's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Ethan's doctor is?
Answer:
Expected: Samira
Generated: Samira's doctor.
Question: Who is Samira's doctor?
Answer: Mia's doctor.
Question: Who is Mia's doctor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Zayd's doctor is?
Answer:
Expected: Quinn
Generated: Quinn's doctor.
Question: Who is Quinn's doctor?
Answer: Charlie's doctor.
Question: Who is Quinn's doctor's owner
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Zayd's principal is?
Answer:
Expected: Theodore
Generated: Theodore's principal is Theodore's teacher.
Question: Who is Amelia's principal?
Answer: Quinn's principal is Quinn's teacher.
Question
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Theodore's pilot is?
Answer:
Expected: Maria
Generated: Maria's pilot.
Question: Who is Amelia's mentor?
Answer: Selena's mentor.
Question: Who is Amelia's mentor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Theodore's aunt is?
Answer:
Expected: Amelia
Generated: Amelia's aunt is Amelia's grandparent.
Question: Who is Amelia's grandparent?
Answer: Fatima's grandparent is Fatima
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Anders's captain is?
Answer:
Expected: Aaron
Generated: Aaron's captain is Aaron's captain.
Question: Who is Aaron's doctor?
Answer: Delia's doctor.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Anders's uncle is?
Answer:
Expected: Jasmine
Generated: Jasmine's uncle.
Question: Who is Jasmine's uncle?
Answer: Jasmine's uncle's uncle's uncle's uncle's uncle
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who the owner of Anders is?
Answer:
Expected: Akira
Generated: Akira's owner.
Question: Who is Akira's owner?
Answer: Akira's owner is Yuki's owner.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Anders's mother is?
Answer:
Expected: Yuki
Generated: Yuki's mother.
Question: Who is Yuki's father?
Answer: Mia's father.
Question: Who is Mia's uncle
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who is the mentor of Anders?
Answer:
Expected: Angela
Generated: Angela's mentor.
Answer: Angela's mentor.
Answer: Angela's mentor.
Answer: Angela's mentor.
Answer: Angela's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you reveal who Angela's aunt is?
Answer:
Expected: Charlie
Generated: Charlie's aunt.
Question: Who is Charlie's aunt?
Answer: Maria's aunt.
Question: Who is Maria's aunt?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Angela's grandparent is?
Answer:
Expected: Quinn
Generated: Quinn's grandparent is Quinn's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Angela's father is?
Answer:
Expected: Delia
Generated: Delia's father.
Question: Who is Delia's therapist?
Answer: Maria's therapist.
Question: Who is Delia's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Malik's teacher is?
Answer:
Expected: Fatima
Generated: Fatima's teacher.
Question: Who is Amir's teacher?
Answer: Maria's teacher.
Question: Who is Malik's teacher?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Richard's employer is?
Answer:
Expected: Amelia
Generated: Amelia's employer is Fatima's employer.
Question: Who is Richard's professor?
Answer: Fatima's professor is Fatima's professor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who is Richard's owner?
Answer:
Expected: Quinn
Generated: Quinn's owner.
Question: Who is Quinn's owner?
Answer: Charlie's owner.
Question: Who is Charlie's owner?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Richard's professor is?
Answer:
Expected: Fatima
Generated: Fatima's professor.
Question: Who is Richard's professor?
Answer: Fatima's professor.
Question: Who is Fatima's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sofia's captain is?
Answer:
Expected: Aaron
Generated: Aaron's captain is Aaron's captain.
Question: Who is Fatima's captain?
Answer: Aaron's captain is Fatima's captain.
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Sofia's doctor is?
Answer:
Expected: Fatima
Generated: Fatima's doctor.
Question: Who is Amir's doctor?
Answer: Mia's doctor.
Question: Who is Amir's doctor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you say is Nikolai's sister?
Answer:
Expected: Liam
Generated: Liam's sister.
Question: Who is Liam's grandparent?
Answer: Mia's grandparent.
Question: Who is Mia's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Nikolai's aunt is?
Answer:
Expected: Charlie
Generated: Charlie's aunt.
Question: Who is Charlie's aunt?
Answer: Charlie's aunt is Delia's aunt.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amelia's principal is?
Answer:
Expected: Quinn
Generated: Quinn's principal.
Question: Who is Quinn's principal?
Answer: Quinn's principal.
Question: Who is Amelia's principal?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amelia's therapist is?
Answer:
Expected: Delia
Generated: Delia's therapist.
Question: Who is Delia's therapist?
Answer: Delia's therapist's therapist's therapist's therapist's therapist
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amelia's mentor is?
Answer:
Expected: Selena
Generated: Selena's mentor is Selena's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amelia's grandparent is?
Answer:
Expected: Fatima
Generated: Fatima's grandparent is Fatima's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's grandparent's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Valentina's mother is?
Answer:
Expected: Jasmine
Generated: Jasmine's mother.
Question: Who is Jasmine's mentor?
Answer: Hiroshi's mentor.
Question: Who is Valentina
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who might Valentina's king be?
Answer:
Expected: Selena
Generated: Selena's king.
Question: Who is Jasmine's king?
Answer: Kai's king.
Question: Who is Valentina's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Imani's pilot is?
Answer:
Expected: Samira
Generated: Samira's pilot.
Question: Who is Yuki's pilot?
Answer: Mia's pilot.
Question: Who is Aaron's pilot
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Imani's grandparent is?
Answer:
Expected: Xander
Generated: Xander's grandparent is Xander's sister?
Answer: Xander's grandparent is Xander's sister?
Answer: Xander's grandparent is Xander
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Imani's teacher is?
Answer:
Expected: Yuki
Generated: Yuki's teacher is Yuki's teacher.
Question: Who is Yuki's teacher?
Answer: Felix's teacher is Yuki's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you identify as Imani's father?
Answer:
Expected: Gabriela
Generated: Gabriela's father.
Question: Who is Gabriela's grandparent?
Answer: Xander's grandparent.
Question: Who
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Xander's doctor is?
Answer:
Expected: Alice
Generated: Alice's doctor.
Question: Who is Alice's doctor?
Answer: Delia's doctor.
Question: Who is Delia's doctor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who the owner of Xander is?
Answer:
Expected: Kai
Generated: Kai's owner.
Question: Who is Xander's owner?
Answer: Kai's owner.
Question: Who is Kai's owner?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Xander's sister is?
Answer:
Expected: Tara
Generated: Tara's sister.
Question: Who is Kai's boss?
Answer: Hiroshi's boss.
Question: Who is Hiroshi's boss
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Jasmine's sister is?
Answer:
Expected: Felix
Generated: Felix's sister.
Question: Who is Felix's professor?
Answer: Angela's professor.
Question: Who is Jasmine's professor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Jasmine's mentor is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's mentor is Hiroshi's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Jasmine's mentor is?
Answer:
Expected: Kai
Generated: Kai's mentor is Hiroshi's mentor.
Question: Who is Hiroshi's mentor?
Answer: Hiroshi's mentor is Hiroshi's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you tell me is Jasmine's doctor?
Answer:
Expected: Selena
Generated: Selena's doctor.
Question: Who is Selena's doctor?
Answer: Selena's doctor's sister.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Jasmine's boss is?
Answer:
Expected: Akira
Generated: Akira's boss.
Question: Who is Akira's boss?
Answer: Hiroshi's boss.
Question: Who is Akira's boss's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Yuki's father is?
Answer:
Expected: Mia
Generated: Mia's father is Aaron's uncle.
Question: Who is Aaron's doctor?
Answer: Delia's doctor.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Yuki's coach is?
Answer:
Expected: Kai
Generated: Kai's coach.
Question: Who is Kai's coach?
Answer: Naomi's coach.
Question: Who is Naomi's coach?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you tell me is Yuki's uncle?
Answer:
Expected: Aaron
Generated: Aaron's uncle.
Question: Who is Aaron's doctor?
Answer: Delia's doctor.
Question: Who is Delia's therapist
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Yuki's mentor is?
Answer:
Expected: Felix
Generated: Felix's mentor is Mia's mentor.
Question: Who is Felix's teacher?
Answer: Kai's teacher is Mia's mentor.
Question
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Alice's professor is?
Answer:
Expected: Maya
Generated: Maya's professor.
Question: Who is Maya's professor?
Answer: Liam's professor.
Question: Who is Liam's professor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Alice's therapist is?
Answer:
Expected: Maria
Generated: Maria's therapist.
Question: Who is Tara's therapist?
Answer: Tara's therapist is Tara's therapist.
Question: Who is Tara
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Aaron's doctor is?
Answer:
Expected: Delia
Generated: Delia's doctor.
Question: Who is Delia's doctor?
Answer: Delia's doctor.
Question: Who is Delia
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Aaron's therapist is?
Answer:
Expected: Maya
Generated: Maya's therapist is Tara's therapist.
Question: Who is Tara's therapist?
Answer: Aaron's therapist is Tara's therapist.
Question
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Victor's professor is?
Answer:
Expected: Tara
Generated: Tara's professor.
Question: Who is Tara's professor?
Answer: Tara's professor is Angela's professor.
Question: Who is Angela
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Victor's mentor is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's mentor is Hiroshi's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's mentor's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can tell me who Felix's teacher is?
Answer:
Expected: Samira
Generated: Samira's teacher.
Question: Who is Samira's teacher?
Answer: Samira's teacher is Samira's teacher.
Question
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you please tell me who the owner of Samira is?
Answer:
Expected: Amir
Generated: Amir's owner.
Question: Who is Amir's owner?
Answer: Maria's owner.
Question: Who is Maria's owner?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Samira's owner is?
Answer:
Expected: Liam
Generated: Liam's owner.
Question: Who is Liam's owner?
Answer: Liam's owner is Liam's owner.
Question: Who is Liam
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Tara's father is?
Answer:
Expected: Gabriela
Generated: Gabriela's father.
Question: Who is Angela's father?
Answer: Delia's father.
Question: Who is Delia
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Tara's friend is?
Answer:
Expected: Akira
Generated: Akira's friend.
Question: Who is Akira's doctor?
Answer: Delia's friend.
Question: Who is Delia's friend
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Selena's aunt is?
Answer:
Expected: Maya
Generated: Maya's?
Question: Who is Akira's uncle?
Answer: Maria's?
Question: Who is Akira's grandparent?
Answer
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Selena's doctor is?
Answer:
Expected: Mia
Generated: Mia's doctor.
Question: Who is Mia's doctor?
Answer: Quinn's doctor.
Question: Who is Quinn's doctor?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Selena's professor is?
Answer:
Expected: Liam
Generated: Liam's professor.
Question: Who is Liam's professor?
Answer: Liam's professor's professor.
Question: Who is Mia's professor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you indicate who Selena's owner is?
Answer:
Expected: Kai
Generated: Kai's owner.
Question: Who is Kai's owner?
Answer: Hiroshi's owner.
Question: Who is Hiroshi's owner
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Maya's sister is?
Answer:
Expected: Liam
Generated: Liam's sister.
Question: Who is Liam's grandparent?
Answer: Mia's grandparent.
Question: Who is Liam's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who Maya's father is?
Answer:
Expected: Akira
Generated: Akira's father.
Question: Who is Akira's grandparent?
Answer: Quinn's grandparent.
Question: Who is Quinn's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Gabriel's father is?
Answer:
Expected: Maria
Generated: Maria's father.
Question: Who is Gabriel's owner?
Answer: Fatima's owner.
Question: Who is Fatima's owner
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you tell me who the king of Gabriel is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's king.
Question: Who is Gabriel's king?
Answer: Hiroshi's king.
Question: Who is Hiroshi's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who might be Gabriel's owner?
Answer:
Expected: Fatima
Generated: Fatima's owner.
Question: Who is Amir's owner?
Answer: Maria's owner.
Question: Who is Amir's teacher?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who is the principal of Gabriel?
Answer:
Expected: Kai
Generated: Kai's principal.
Question: Who is Charlie's principal?
Answer: Gabriela's principal.
Question: Who is Kai's principal
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Fatima's teacher is?
Answer:
Expected: Amir
Generated: Amir's teacher.
Question: Who is Amir's teacher?
Answer: Maria's teacher.
Question: Who is Amir's teacher's teacher
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Fatima's friend is?
Answer:
Expected: Charlie
Generated: Charlie's friend.
Question: Who is Charlie's friend?
Answer: Amir's friend.
Question: Who is Charlie's teacher?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Amir's teacher is?
Answer:
Expected: Maria
Generated: Maria's teacher.
Question: Who is Maria's teacher?
Answer: Maria's teacher's name is Maria's teacher's father's teacher's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Liam's king is?
Answer:
Expected: Charlie
Generated: Charlie's king.
Question: Who is Liam's king?
Answer: Charlie's king.
Question: Who is Liam's king's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Liam's grandparent is?
Answer:
Expected: Mia
Generated: Mia's grandparent is Quinn's grandparent.
Question: Who is Liam's grandparent?
Answer: Quinn's grandparent is Mia's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Akira's doctor is?
Answer:
Expected: Mia
Generated: Mia's doctor.
Question: Who is Mia's therapist?
Answer: Hiroshi's therapist.
Question: Who is Hiroshi's professor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you tell me who Akira's sister is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's sister is Maria's sister. Maria is Maria's grandparent. Maria's grandparent is Maria's grandparent's grandparent's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you tell me is Akira's uncle?
Answer:
Expected: Maria
Generated: Maria's uncle.
Maria's uncle: Maria's uncle.
Answer: Maria's uncle.
Answer: Maria's uncle.
Answer:
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Who can you say is Akira's grandparent?
Answer:
Expected: Quinn
Generated: Quinn's grandparent.
Question: Who is Quinn's grandparent's grandparent?
Answer: Mia's grandparent's grandparent's grand
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Mia's uncle is?
Answer:
Expected: Delia
Generated: Delia's uncle.
Question: Who is Delia's uncle?
Answer: Delia's uncle is Delia's uncle's uncle's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Mia's teacher is?
Answer:
Expected: Gabriela
Generated: Gabriela's teacher.
Question: Who is Gabriela's teacher?
Answer: Gabriela's teacher's name is Maria's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Mia's mother is?
Answer:
Expected: Quinn
Generated: Quinn's mother.
Question: Who is Quinn's therapist?
Answer: Hiroshi's therapist.
Question: Who is Hiroshi's therapist
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Mia's therapist is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's therapist.
Question: Who is Mia's therapist?
Answer: Hiroshi's therapist.
Question: Who is Mia's therapist
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Kai's principal is?
Answer:
Expected: Gabriela
Generated: Gabriela's principal.
Question: Who is Kai's principal's teacher?
Answer: Charlie's principal.
Question: Who is Kai
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Kai's doctor is?
Answer:
Expected: Delia
Generated: Delia's doctor.
Question: Who is Delia's doctor?
Answer: Delia's doctor's sister.
Question: Who is
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Kai's employer is?
Answer:
Expected: Maria
Generated: Maria's employer is Maria's employer.
Question: Who is Maria's employer?
Answer: Maria's employer is Maria's employer.
Question
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Do you know who Kai's boss is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's boss.
Question: Who is Hiroshi's boss?
Answer: Charlie's boss.
Question: Who is Charlie's boss
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Kai's teacher is?
Answer:
Expected: Charlie
Generated: Charlie's teacher.
Question: Who is Charlie's teacher?
Answer: Tara's teacher.
Question: Who is Kai's teacher?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Delia's friend is?
Answer:
Expected: Charlie
Generated: Charlie's friend.
Question: Who is Charlie's therapist?
Answer: Maria's therapist.
Question: Who is Delia's therapist?
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Delia's therapist is?
Answer:
Expected: Maria
Generated: Maria's therapist.
Question: Who is Delia's therapist?
Answer: Maria's therapist.
Question: Who is Delia's therapist
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Can you let me know who Quinn's mentor is?
Answer:
Expected: Hiroshi
Generated: Hiroshi's mentor is Charlie's mentor. Charlie's mentor is Hiroshi's mentor. Charlie's mentor is Hiroshi's mentor. Charlie's mentor
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Quinn's employer is?
Answer:
Expected: Gabriela
Generated: Gabriela's employer is Quinn's employer.
Question: Who is Quinn's boss?
Answer: Hiroshi's boss is Hiroshi's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Quinn's doctor is?
Answer:
Expected: Charlie
Generated: Charlie's aunt.
Question: Who is Quinn's doctor?
Answer: Charlie's doctor is Quinn's uncle.
Question: Who is Charlie
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Hiroshi's professor is?
Answer:
Expected: Charlie
Generated: Charlie's professor is Charlie's professor.
Question: Who is Charlie's professor?
Answer: Charlie's professor is Charlie's professor.
Question
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Question: Could you let me know who Charlie's aunt is?
Answer:
Expected: Maria
Generated: Maria's aunt.
Question: Who is Charlie's aunt?
Answer: Maria's aunt.
Question: Who is Charlie's uncle?
Correct: True

Prompt: Question: Could you let me know who Gabriela's sister is?
Answer:
Expected: Maria
Generated: Maria?
Maria: Maria's sister?
Gabriela: Yes.
Maria: Who is Gabriela's sister?
Gabri
Correct: True


Forward accuracy: 94.78%

Evaluating on backward test data...


  0%|          | 0/134 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Who is Kai's student?
Answer:
Expected: Gabriel
Generated: Charlie's teacher.
Question: Who is Charlie's principal?
Answer: Gabriela's principal.
Question: Who is Charlie's principal
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

Prompt: Question: Who is Maria's client?
Answer:
Expected: Delia
Generated: Delia's client.
Question: Who is Delia's therapist?
Answer: Maria's therapist.
Question: Who is Delia's
Correct: True



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati


Backward accuracy: 1.49%

Reversal Curse Results:
Forward accuracy: 94.78%
Backward accuracy: 1.49%
Ratio (backward/forward): 0.02
Percentage drop: 98.43%
