In [1]:
from itertools import product
from random import sample, choice

n = 100
s = 4
N_SHOT = 5

colors = ['r', 'b', 'g', 'y']
shapes = ['circle', 'star', 'square', 'cross']
numbers = ['1', '2', '3', '4']
tasks = ['color', 'shape', 'number']

cards = [{'color': c, 'shape': s, 'number': n} for c, s, n in product(colors, shapes, numbers)]
cards_sample = [sample(cards, s) for _ in range(n)]


In [2]:
tasks = ['color']  # We're considering only one task for simplicity

cards = [{'color': c, 'shape': s, 'number': n} for c, s, n in product(colors, shapes, numbers)]

def generate_dataset(num_samples):
    dataset = []

    for _ in range(num_samples):
        reference_card = choice(cards)
        card_set = sample(cards, s)

        # Determine if a matching card exists in the set
        matching_exists = any(card[tasks[0]] == reference_card[tasks[0]] for card in card_set)
        y = "yes" if matching_exists else "no"

        dataset.append(({'reference_card': reference_card, 'choices': card_set}, y))

    return dataset

dataset = generate_dataset(n)

# Example: Displaying the first sample in the dataset
print(dataset[0])


({'reference_card': {'color': 'r', 'shape': 'cross', 'number': '1'}, 'choices': [{'color': 'y', 'shape': 'cross', 'number': '3'}, {'color': 'r', 'shape': 'circle', 'number': '3'}, {'color': 'b', 'shape': 'square', 'number': '1'}, {'color': 'r', 'shape': 'cross', 'number': '3'}]}, 'yes')


In [3]:
dataset[0]

({'reference_card': {'color': 'r', 'shape': 'cross', 'number': '1'},
  'choices': [{'color': 'y', 'shape': 'cross', 'number': '3'},
   {'color': 'r', 'shape': 'circle', 'number': '3'},
   {'color': 'b', 'shape': 'square', 'number': '1'},
   {'color': 'r', 'shape': 'cross', 'number': '3'}]},
 'yes')

In [4]:
def format_prompt(reference_card, choices, n_shot=5):

    instructional_prompt = ""

    for shot in range(n_shot):
      data = choice(dataset)
      instructional_prompt += f"Example {shot+1}:\n Reference card: {data[0]['reference_card']}\nChoices: {data[0]['choices']}\nAnswer: {data[1]}\n"
    instructional_prompt += "Now, it's your turn!\n"


    prompt = instructional_prompt
    prompt += "Reference card: {}\n".format(reference_card)
    for idx, card in enumerate(choices):
        prompt += "Choice {}: {}\n".format(idx+1, card)
    prompt += "Does a matching card exist based on color?\nAnswer: "

    return prompt


In [5]:
dataset

[({'reference_card': {'color': 'r', 'shape': 'cross', 'number': '1'},
   'choices': [{'color': 'y', 'shape': 'cross', 'number': '3'},
    {'color': 'r', 'shape': 'circle', 'number': '3'},
    {'color': 'b', 'shape': 'square', 'number': '1'},
    {'color': 'r', 'shape': 'cross', 'number': '3'}]},
  'yes'),
 ({'reference_card': {'color': 'r', 'shape': 'square', 'number': '3'},
   'choices': [{'color': 'y', 'shape': 'circle', 'number': '3'},
    {'color': 'g', 'shape': 'square', 'number': '4'},
    {'color': 'g', 'shape': 'star', 'number': '1'},
    {'color': 'b', 'shape': 'square', 'number': '3'}]},
  'no'),
 ({'reference_card': {'color': 'r', 'shape': 'square', 'number': '1'},
   'choices': [{'color': 'r', 'shape': 'cross', 'number': '1'},
    {'color': 'r', 'shape': 'square', 'number': '2'},
    {'color': 'y', 'shape': 'square', 'number': '4'},
    {'color': 'b', 'shape': 'square', 'number': '2'}]},
  'yes'),
 ({'reference_card': {'color': 'g', 'shape': 'square', 'number': '2'},
   'ch

In [6]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
from tqdm import tqdm
import env

# test if model is accurate

model_names = ['gpt2-medium',"gpt2-large","EleutherAI/gpt-neo-125m"]
'meta-llama/Llama-2-7b-hf'
model_name = model_names[0]

model = AutoModelForCausalLM.from_pretrained(model_name, token=env.HF_TOKEN).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)

# CUDA
if torch.cuda.is_available():
    model.cuda()

x = None

total = 0
correct = 0
SHOW_PROMPT = False

for (data, label) in tqdm(dataset):
  reference_card = data['reference_card']
  choices = data['choices']
  answer = label

  prompt = format_prompt(reference_card, choices, n_shot=N_SHOT)
  input_ids = tokenizer.encode(prompt, return_tensors='pt')

  if torch.cuda.is_available():
    input_ids = input_ids.to('cuda')

  # Create attention mask
  attention_mask = torch.ones_like(input_ids).to("cuda")

  with torch.no_grad():
      # In this case, using -100 as pad_token_id as the model generates an error if None is used and the warning is not harmful
      output = model.generate(input_ids,
                              attention_mask=attention_mask,
                              max_length=512,
                              num_return_sequences=1,
                              temperature=1.0,
                              pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else -100)
  x = (model(input_ids))

  predicted_text = tokenizer.decode(output[0])

  predicted_answer = "yes" if "yes" in predicted_text[len(prompt):].lower() else "no"

  if predicted_answer not in ["yes", "no"]:
    raise ValueError("Invalid answer: {}".format(predicted_answer))

  if SHOW_PROMPT:
    print("Prompt: {}".format(prompt))
    print("Predicted answer: {}".format(predicted_answer))
    print("Actual answer: {}".format(label))

  total += 1
  if predicted_answer == answer:
    correct += 1

print("Accuracy: {}".format(correct/total))



  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 100/100 [00:13<00:00,  7.30it/s]

Accuracy: 0.33





In [7]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
from tqdm import tqdm
import env

# test if model is accurate

model_names = ['gpt2-medium',"gpt2-large","EleutherAI/gpt-neo-125m"]
'meta-llama/Llama-2-7b-hf'
model_name = model_names[1]

model = AutoModelForCausalLM.from_pretrained(model_name, token=env.HF_TOKEN).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)

# CUDA
if torch.cuda.is_available():
    model.cuda()

x = None

total = 0
correct = 0
SHOW_PROMPT = False

for (data, label) in tqdm(dataset):
  reference_card = data['reference_card']
  choices = data['choices']
  answer = label

  prompt = format_prompt(reference_card, choices, n_shot=N_SHOT)
  input_ids = tokenizer.encode(prompt, return_tensors='pt')

  if torch.cuda.is_available():
    input_ids = input_ids.to('cuda')

  # Create attention mask
  attention_mask = torch.ones_like(input_ids).to("cuda")

  with torch.no_grad():
      # In this case, using -100 as pad_token_id as the model generates an error if None is used and the warning is not harmful
      output = model.generate(input_ids,
                              attention_mask=attention_mask,
                              max_length=512,
                              num_return_sequences=1,
                              temperature=1.0,
                              pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else -100)
  x = (model(input_ids))

  predicted_text = tokenizer.decode(output[0])

  predicted_answer = "yes" if "yes" in predicted_text[len(prompt):].lower() else "no"

  if predicted_answer not in ["yes", "no"]:
    raise ValueError("Invalid answer: {}".format(predicted_answer))

  if SHOW_PROMPT:
    print("Prompt: {}".format(prompt))
    print("Predicted answer: {}".format(predicted_answer))
    print("Actual answer: {}".format(label))

  total += 1
  if predicted_answer == answer:
    correct += 1

print("Accuracy: {}".format(correct/total))



In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
from tqdm import tqdm
import env

# test if model is accurate

model_names = ['gpt2-medium',"gpt2-large","EleutherAI/gpt-neo-125m"]
'meta-llama/Llama-2-7b-hf'
model_name = model_names[2]

model = AutoModelForCausalLM.from_pretrained(model_name, token=env.HF_TOKEN).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)

# CUDA
if torch.cuda.is_available():
    model.cuda()

x = None

total = 0
correct = 0
SHOW_PROMPT = False

for (data, label) in tqdm(dataset):
  reference_card = data['reference_card']
  choices = data['choices']
  answer = label

  prompt = format_prompt(reference_card, choices, n_shot=N_SHOT)
  input_ids = tokenizer.encode(prompt, return_tensors='pt')

  if torch.cuda.is_available():
    input_ids = input_ids.to('cuda')

  # Create attention mask
  attention_mask = torch.ones_like(input_ids).to("cuda")

  with torch.no_grad():
      # In this case, using -100 as pad_token_id as the model generates an error if None is used and the warning is not harmful
      output = model.generate(input_ids,
                              attention_mask=attention_mask,
                              max_length=512,
                              num_return_sequences=1,
                              temperature=1.0,
                              pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else -100)
  x = (model(input_ids))

  predicted_text = tokenizer.decode(output[0])

  predicted_answer = "yes" if "yes" in predicted_text[len(prompt):].lower() else "no"

  if predicted_answer not in ["yes", "no"]:
    raise ValueError("Invalid answer: {}".format(predicted_answer))

  if SHOW_PROMPT:
    print("Prompt: {}".format(prompt))
    print("Predicted answer: {}".format(predicted_answer))
    print("Actual answer: {}".format(label))

  total += 1
  if predicted_answer == answer:
    correct += 1

print("Accuracy: {}".format(correct/total))



100%|██████████| 100/100 [00:24<00:00,  4.02it/s]

Accuracy: 0.29



