In [None]:
from datasets import load_dataset
from torch.utils.data import DataLoader
import re

In [1]:
# SVAMP, AQUA, GSM8K, MATH, StrategyQA datasets
svamp_test = load_dataset("tongyx361/svamp", split="test")
aqua_test = load_dataset("deepmind/aqua_rat", split="test") 
gsm8k_test = load_dataset("openai/gsm8k", "main", split="test")
strategy_qa_test = load_dataset("ChilleD/StrategyQA", split="test")

NameError: name 'load_dataset' is not defined

In [None]:
# test loading
print("SVAMP Sample:", svamp_test[0])
print("AQUA Sample:", aqua_test[0])
print("GSM8K Sample:", gsm8k_test[0])
print("StrategyQA Sample:", strategy_qa_test[0])

SVAMP Sample: {'ID': 'chal-1', 'Body': 'Each pack of dvds costs 76 dollars. If there is a discount of 25 dollars on each pack', 'Question': 'How much do you have to pay to buy each pack?', 'Equation': '( 76.0 - 25.0 )', 'Answer': 51.0, 'Type': 'Subtraction'}
AQUA Sample: {'question': 'A car is being driven, in a straight line and at a uniform speed, towards the base of a vertical tower. The top of the tower is observed from the car and, in the process, it takes 10 minutes for the angle of elevation to change from 45° to 60°. After how much more time will this car reach the base of the tower?', 'options': ['A)5(√3 + 1)', 'B)6(√3 + √2)', 'C)7(√3 – 1)', 'D)8(√3 – 2)', 'E)None of these'], 'rationale': 'Explanation :\nLet the height of the building be h. Initially, he was at an angle of 450. tan 45 = h/distance between car and tower. h = distance between car and tower (since tan 45 = 1).\nNow, after 10 minutes, it travelled a certain distance, and angle changed to 600.\ntan 60 = h/x x = h/√

In [None]:
# parse

def parse_svamp_example(example):
    full_question = example["Body"] + " " + example["Question"]
    example['question'] = full_question
    example['final_answer'] = str(example['Answer'])
    return example

def parse_aqua_example(example):
    options = "\n".join(example['options'])
    example['question'] = example['question'] + "\nOptions:\n" + options
    example['final_answer'] = example['correct']
    return example

def parse_gsm8k_example(example):
    split = example["answer"].split("#### ")
    example['final_answer'] = split[-1].strip()
    example['reasoning'] = split[0].strip()
    
    return example

# StrategyQA has 'question', 'answer' (bool), 'facts' (list), 'decomposition' (list)
def parse_strategy_qa_example(example):
    example['question'] = example['question']
    example['final_answer'] = str(example['answer']) # Convert boolean to string
    return example

In [35]:
# mappings
svamp_benchmark_processed = svamp_test.map(parse_svamp_example)
aqua_benchmark_processed = aqua_test.map(parse_aqua_example)
gsm8k_benchmark_processed = gsm8k_test.map(parse_gsm8k_example)
strategy_qa_benchmark_processed = strategy_qa_test.map(parse_strategy_qa_example)

Map: 100%|██████████| 687/687 [00:00<00:00, 5472.22 examples/s]


In [36]:
# dataloaders
BATCH_SIZE = 4
svamp_dataloader = DataLoader(svamp_benchmark_processed, batch_size=BATCH_SIZE)
aqua_dataloader = DataLoader(aqua_benchmark_processed, batch_size=BATCH_SIZE)
gsm8k_dataloader = DataLoader(gsm8k_benchmark_processed, batch_size=BATCH_SIZE)
strategy_qa_dataloader = DataLoader(strategy_qa_benchmark_processed, batch_size=BATCH_SIZE)

In [38]:
# use 'final_answer' and 'question'
benchmark_dataloaders = [svamp_dataloader, aqua_dataloader, gsm8k_dataloader, strategy_qa_dataloader]

for dataloader in benchmark_dataloaders:
    batch = next(iter(dataloader))
    print("Question:", batch['question'][0])
    print("Answer:", batch['final_answer'][0])
    print("-" * 20)

Question: Each pack of dvds costs 76 dollars. If there is a discount of 25 dollars on each pack How much do you have to pay to buy each pack?
Answer: 51.0
--------------------
Question: A car is being driven, in a straight line and at a uniform speed, towards the base of a vertical tower. The top of the tower is observed from the car and, in the process, it takes 10 minutes for the angle of elevation to change from 45° to 60°. After how much more time will this car reach the base of the tower?
Options:
A)5(√3 + 1)
B)6(√3 + √2)
C)7(√3 – 1)
D)8(√3 – 2)
E)None of these
Answer: A
--------------------
Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?
Answer: 18
--------------------
Question: Was ship that recovered Apollo 13 named after a World War II ba

In [37]:
# test dataloaders
svamp_batch = next(iter(svamp_dataloader))
print("SVAMP Question:", svamp_batch['question'][0])
print("SVAMP Answer:", svamp_batch['final_answer'][0])

print("-" * 20)

aqua_batch = next(iter(aqua_dataloader))
print("AQUA Question:", aqua_batch['question'][0])
print("AQUA Answer:", aqua_batch['final_answer'][0])

print("-" * 20)

gsm8k_batch = next(iter(gsm8k_dataloader))
print("GSM8K Question:", gsm8k_batch['question'][0])
print("GSM8K Answer:", gsm8k_batch['final_answer'][0])

print("-" * 20)

sqa_batch = next(iter(strategy_qa_dataloader))
print("StrategyQA Question:", sqa_batch['question'][0])
print("StrategyQA Answer:", sqa_batch['final_answer'][0])

# yaaay!

SVAMP Question: Each pack of dvds costs 76 dollars. If there is a discount of 25 dollars on each pack How much do you have to pay to buy each pack?
SVAMP Answer: 51.0
--------------------
AQUA Question: A car is being driven, in a straight line and at a uniform speed, towards the base of a vertical tower. The top of the tower is observed from the car and, in the process, it takes 10 minutes for the angle of elevation to change from 45° to 60°. After how much more time will this car reach the base of the tower?
Options:
A)5(√3 + 1)
B)6(√3 + √2)
C)7(√3 – 1)
D)8(√3 – 2)
E)None of these
AQUA Answer: A
--------------------
GSM8K Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?
GSM8K Answer: 18
--------------------
StrategyQA Question: Was ship that reco

In [None]:
# might be useful

import tiktoken

def analyze_dataloader(name, dataloader):

    encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")

    total_questions = 0
    total_tokens = 0
        
    for batch in dataloader:
        questions = batch['question']
        batch_size = len(questions)
        total_questions += batch_size
        
        for q in questions:
            tokens = encoding.encode(q)
            total_tokens += len(tokens)
            
    avg_tokens = total_tokens / total_questions if total_questions > 0 else 0
    
    print(f"Dataset: {name}")
    print(f"  Total Questions: {total_questions}")
    print(f"  Total Tokens (gpt-3.5-turbo): {total_tokens}")
    print(f"  Avg Tokens per Question: {avg_tokens:.2f}")
    print("-" * 30)

datasets_to_analyze = [
    ("SVAMP", svamp_dataloader),
    ("AQUA", aqua_dataloader),
    ("GSM8K", gsm8k_dataloader),
    ("StrategyQA", strategy_qa_dataloader)
]

print("Analyzing Datasets with GPT-3.5-turbo tokenizer...\n")
for name, loader in datasets_to_analyze:
    analyze_dataloader(name, loader)

Analyzing Datasets with GPT-3.5-turbo tokenizer...

Dataset: SVAMP
  Total Questions: 1000
  Total Tokens (gpt-3.5-turbo): 39038
  Avg Tokens per Question: 39.04
------------------------------
Dataset: AQUA
  Total Questions: 254
  Total Tokens (gpt-3.5-turbo): 18786
  Avg Tokens per Question: 73.96
------------------------------
Dataset: GSM8K
  Total Questions: 1319
  Total Tokens (gpt-3.5-turbo): 77791
  Avg Tokens per Question: 58.98
------------------------------
Dataset: StrategyQA
  Total Questions: 687
  Total Tokens (gpt-3.5-turbo): 8430
  Avg Tokens per Question: 12.27
------------------------------
