In [1]:
from itertools import product
from random import sample, choice

n = 100
s = 4

colors = ['r', 'b', 'g', 'y']
shapes = ['circle', 'star', 'square', 'cross']
numbers = ['1', '2', '3', '4']
tasks = ['color', 'shape', 'number']

cards = [{'color': c, 'shape': s, 'number': n} for c, s, n in product(colors, shapes, numbers)]
cards_sample = [sample(cards, s) for _ in range(n)]

def wcst():
    correct_count = 0
    current_rule = choice(tasks)
    print(current_rule)

    # Generate a reference card
    reference_card = choice(cards)
    print(f"Reference card: {reference_card}")

    for card_set in cards_sample:
        print(f"Cards: {card_set}")

        matching_exists = any(card[current_rule] == reference_card[current_rule] for card in card_set)

        user_answer = input("Does a matching card exist based on the secret rule? (yes/no) ")

        if (user_answer == "yes" and matching_exists) or (user_answer == "no" and not matching_exists):
            print("Correct!")
            correct_count += 1
        else:
            print("Incorrect!")
            correct_count = 0

        # Rule change after 10 correct matches
        if correct_count == 10:
            current_rule = choice(tasks)
            correct_count = 0
            print("Rule changed!")

        if input("Continue? (yes/no) ") != "yes":
            break

# wcst()


In [4]:
tasks = ['color']  # We're considering only one task for simplicity

cards = [{'color': c, 'shape': s, 'number': n} for c, s, n in product(colors, shapes, numbers)]

def generate_dataset(num_samples):
    dataset = []

    for _ in range(num_samples):
        reference_card = choice(cards)
        card_set = sample(cards, s)

        # Determine if a matching card exists in the set
        matching_exists = any(card[tasks[0]] == reference_card[tasks[0]] for card in card_set)
        y = "yes" if matching_exists else "no"

        dataset.append(({'reference_card': reference_card, 'choices': card_set}, y))

    return dataset

dataset = generate_dataset(n)

# Example: Displaying the first sample in the dataset
print(dataset[0])


({'reference_card': {'color': 'g', 'shape': 'star', 'number': '3'}, 'choices': [{'color': 'b', 'shape': 'star', 'number': '1'}, {'color': 'g', 'shape': 'square', 'number': '4'}, {'color': 'b', 'shape': 'star', 'number': '2'}, {'color': 'b', 'shape': 'circle', 'number': '4'}]}, 'yes')


In [5]:
dataset[0]

({'reference_card': {'color': 'g', 'shape': 'star', 'number': '3'},
  'choices': [{'color': 'b', 'shape': 'star', 'number': '1'},
   {'color': 'g', 'shape': 'square', 'number': '4'},
   {'color': 'b', 'shape': 'star', 'number': '2'},
   {'color': 'b', 'shape': 'circle', 'number': '4'}]},
 'yes')

In [4]:
len(dataset)

100

In [6]:
def format_prompt(reference_card, choices, n_shot=5):

    # instructional_prompt = (
    #     "In the Wisconsin Card Sorting Test, a reference card is given. "
    #     "You are presented with a set of choices. The task is to determine if "
    #     "any of the choice cards match the reference card based on a specific attribute, "
    #     "in this case, color. Answer 'yes' if at least one card matches the reference card's color, "
    #     "otherwise answer 'no'.\n\n"
    # )

    instructional_prompt = ""

    for shot in range(n_shot):
      data = choice(dataset)
      instructional_prompt += f"Example {shot+1}:\n Reference card: {data[0]['reference_card']}\nChoices: {data[0]['choices']}\nAnswer: {data[1]}\n"
    instructional_prompt += "Now, it's your turn!\n"


    prompt = instructional_prompt
    prompt += "Reference card: {}\n".format(reference_card)
    for idx, card in enumerate(choices):
        prompt += "Choice {}: {}\n".format(idx+1, card)
    prompt += "Does a matching card exist based on color?\nAnswer: "

    return prompt


In [11]:
!pip install torch transformers

Collecting transformers
  Downloading transformers-4.34.0-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp311-cp311-macosx_10_7_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp311-cp311-macosx_10_7_x86_64.whl (439 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.2/439.2 kB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
Collecting hugg

In [12]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
from tqdm import tqdm

model_names = ['gpt2-medium',"EleutherAI/gpt-j-6B"]

model_name = model_names[0]

model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

model.eval()

if torch.cuda.is_available():
    model.to('cuda')

x = None

def evaluate_model_on_wcst(dataset, model_name="EleutherAI/gpt-j-6B", n=5,max_length=256):
    # Load pre-trained model and tokenizer using auto classes

    correct_count = 0

    for (data, label) in tqdm(dataset):
        reference_card = data['reference_card']
        choices = data['choices']

        prompt = format_prompt(reference_card, choices, n_shot=n)
        print(prompt)
        input_ids = tokenizer.encode(prompt, return_tensors='pt')

        if torch.cuda.is_available():
          input_ids = input_ids.to('cuda')

        # Create attention mask
        attention_mask = torch.ones_like(input_ids).to("cuda")

        with torch.no_grad():
            # In this case, using -100 as pad_token_id as the model generates an error if None is used and the warning is not harmful
            output = model.generate(input_ids,
                                    attention_mask=attention_mask,
                                    max_length=max_length,
                                    num_return_sequences=1,
                                    temperature=1.0,
                                    pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else -100)
        x = (model(input_ids))
        print(x)

        break


    #     predicted_text = tokenizer.decode(output[0], skip_special_tokens=True)
    #     predicted_answer = "yes" if "yes" in predicted_text[len(prompt):].lower() else "no"
    #     print()

    #     if predicted_answer == label:
    #         correct_count += 1

    # accuracy = correct_count / len(dataset)
    # print(f"Accuracy: {accuracy*100:.2f}%")


for model_name in model_names:

  print('n = 4')
  evaluate_model_on_wcst(dataset,model_name=model_name,n=4,max_length=256)
  # print('n = 5')
  # evaluate_model_on_wcst(dataset,model_name=model_name,n=5,max_length=256)
  # print('n = 6')
  # evaluate_model_on_wcst(dataset,model_name=model_name,n=6,max_length=256)


  from .autonotebook import tqdm as notebook_tqdm
Downloading (…)lve/main/config.json: 100%|██████████| 718/718 [00:00<00:00, 589kB/s]
Downloading model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

In [8]:
correct_count = 0

for (data, label) in tqdm(dataset):
    reference_card = data['reference_card']
    choices = data['choices']

    prompt = format_prompt(reference_card, choices, n_shot=n)
    print(prompt)
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    if torch.cuda.is_available():
      input_ids = input_ids.to('cuda')

    # Create attention mask
    attention_mask = torch.ones_like(input_ids).to("cuda")

    with torch.no_grad():
        # In this case, using -100 as pad_token_id as the model generates an error if None is used and the warning is not harmful
        output = model.generate(input_ids,
                                attention_mask=attention_mask,
                                max_length=256,
                                num_return_sequences=1,
                                temperature=1.0,
                                pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else -100)
    x = (model(input_ids))
    print(x)

    break

  0%|          | 0/100 [00:00<?, ?it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (11941 > 1024). Running this sequence through the model will result in indexing errors


Example 1:
 Reference card: {'color': 'y', 'shape': 'circle', 'number': '4'}
Choices: [{'color': 'b', 'shape': 'star', 'number': '1'}, {'color': 'g', 'shape': 'circle', 'number': '1'}, {'color': 'g', 'shape': 'cross', 'number': '3'}, {'color': 'y', 'shape': 'square', 'number': '2'}]
Answer: yes
Example 2:
 Reference card: {'color': 'b', 'shape': 'square', 'number': '1'}
Choices: [{'color': 'g', 'shape': 'cross', 'number': '3'}, {'color': 'b', 'shape': 'star', 'number': '2'}, {'color': 'y', 'shape': 'square', 'number': '4'}, {'color': 'b', 'shape': 'circle', 'number': '2'}]
Answer: yes
Example 3:
 Reference card: {'color': 'y', 'shape': 'square', 'number': '1'}
Choices: [{'color': 'g', 'shape': 'star', 'number': '2'}, {'color': 'r', 'shape': 'square', 'number': '4'}, {'color': 'y', 'shape': 'circle', 'number': '3'}, {'color': 'g', 'shape': 'square', 'number': '1'}]
Answer: yes
Example 4:
 Reference card: {'color': 'b', 'shape': 'cross', 'number': '2'}
Choices: [{'color': 'y', 'shape': '

  0%|          | 0/100 [00:00<?, ?it/s]


RuntimeError: ignored

In [None]:
print(x)

input_ids

In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
from tqdm import tqdm

model_names = ['gpt2-medium',"EleutherAI/gpt-j-6B"]

model_name = model_names[1]

model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

model.eval()

if torch.cuda.is_available():
    model.to('cuda')

def evaluate_model_on_wcst(dataset, model_name="EleutherAI/gpt-j-6B", n=5,max_length=256):
    # Load pre-trained model and tokenizer using auto classes

    correct_count = 0

    for (data, label) in tqdm(dataset):
        reference_card = data['reference_card']
        choices = data['choices']

        prompt = format_prompt(reference_card, choices, n_shot=n)

        input_ids = tokenizer.encode(prompt, return_tensors='pt')

        if torch.cuda.is_available():
          input_ids = input_ids.to('cuda')

        # Create attention mask
        attention_mask = torch.ones_like(input_ids).to("cuda")

        with torch.no_grad():
            # In this case, using -100 as pad_token_id as the model generates an error if None is used and the warning is not harmful
            output = model.generate(input_ids,
                                    attention_mask=attention_mask,
                                    max_length=max_length,
                                    num_return_sequences=1,
                                    temperature=1.0,
                                    pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else -100)

        predicted_text = tokenizer.decode(output[0], skip_special_tokens=True)
        predicted_answer = "yes" if "yes" in predicted_text[len(prompt):].lower() else "no"

        if predicted_answer == label:
            correct_count += 1

    accuracy = correct_count / len(dataset)
    print(f"Accuracy: {accuracy*100:.2f}%")


for model_name in model_names:

  print('n = 4')
  evaluate_model_on_wcst(dataset,model_name=model_name,n=4,max_length=256)
  print('n = 5')
  evaluate_model_on_wcst(dataset,model_name=model_name,n=5,max_length=256)
  print('n = 6')
  evaluate_model_on_wcst(dataset,model_name=model_name,n=6,max_length=256)


In [None]:
%pip install transformers


In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


# Load pre-trained model and tokenizer
model_name = 'gpt2'  # You can specify the model size, e.g., gpt2-small, gpt2-medium, etc.
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Set the model to evaluation mode
model.eval()

# Move the model to the GPU if available
if torch.cuda.is_available():
    model.to('cuda')

# Prompt text
prompt_text = "Once upon a time"
encoded_prompt = tokenizer.encode(prompt_text, return_tensors="pt")

# Move the encoded prompt to the GPU if available
if torch.cuda.is_available():
    encoded_prompt = encoded_prompt.to('cuda')

# Generate text
with torch.no_grad():
    output_sequences = model.generate(
        input_ids=encoded_prompt,
        max_length=100,  # You can adjust this
        temperature=1.0,
        num_return_sequences=1,
    )

# Decode the generated text
generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
print(generated_text)
