In [4]:
!pip install itables

Collecting itables
  Downloading itables-2.5.2-py3-none-any.whl.metadata (10.0 kB)
Downloading itables-2.5.2-py3-none-any.whl (2.4 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: itables
Successfully installed itables-2.5.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [5]:
from datasets import load_from_disk
import pandas as pd
from itables import init_notebook_mode, show


# Load the dataset
dataset = load_from_disk("./processed_data")

# Convert to pandas DataFrame for nice viewing
df = dataset.to_pandas()

# Display basic info
print(f"Total samples: {len(df)}")
print(f"Columns: {list(df.columns)}")
print(f"\nPerturbation type distribution:")
print(df['perturbation_type'].value_counts().sort_index())

# Show a few examples from each perturbation type
print("\n" + "="*100)
for ptype in ['operand_swap',
'number_substitution', 'operator_replace',
'computation_plusminus']:
  print(f"\n{ptype.upper()}:")
  print("-"*100)
  sample = df[df['perturbation_type'] == ptype].iloc[10]
  print(f"Question: {sample['question']}")
  print(f"Original:  {sample['answer']}")
  print(f"Perturbed: {sample['perturbed_answer']}")

init_notebook_mode(all_interactive=True)  # or False, and call show(df) when needed
df

Total samples: 512
Columns: ['question', 'answer', 'perturbed_answer', 'perturbation_type', 'full_answer', 'final_answer']

Perturbation type distribution:
perturbation_type
computation_plusminus    128
number_substitution      128
operand_swap             128
operator_replace         128
Name: count, dtype: int64


OPERAND_SWAP:
----------------------------------------------------------------------------------------------------
Question: Susan has 21 cats and Bob has 3 cats. If Susan gives Robert 4 of her cats, how many more cats does Susan have than Bob?
Original:  After giving away four of her cats, Susan has 21 - 4 = 17.
Perturbed: After giving away four of her cats, Susan has 4 - 21 = -17.

NUMBER_SUBSTITUTION:
----------------------------------------------------------------------------------------------------
Question: Jay & Gloria were hosting a 4th of July party at their house.  Jay invited 22 people and Gloria invited 36.  They wanted to buy small American flags for everyone. 

0
Loading ITables v2.5.2 from the init_notebook_mode cell...  (need help?)


In [2]:
# Set pandas display options for better viewing
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_rows', 50)

# Filter by perturbation type
perturbation_type = 'operand_swap'  # Change this to any type you want
filtered_df = df[df['perturbation_type'] ==
perturbation_type]

print(f"Showing {perturbation_type} samples:")
filtered_df[['question', 'answer',
'perturbed_answer']].head(10)

# Compare original vs perturbed for random samples
import random

random.seed(42)
sample_indices = random.sample(range(len(df)), 5)

for idx in sample_indices:
  row = df.iloc[idx]
  print(f"\n{'='*100}")
  print(f"Type: {row['perturbation_type']}")
  print(f"Question: {row['question'][:80]}...")
  print(f"\nOriginal:  {row['answer']}")
  print(f"Perturbed: {row['perturbed_answer']}")

Showing operand_swap samples:

Type: operand_swap
Question: A vegan restaurant serves three kinds of protein: seitan, beans, and lentils. Th...

Original:  The vegan restaurant serves 10 - 2 - 2 = 6 dishes with only one kind of protein.
Perturbed: The vegan restaurant serves 10 - 2 - 2 = 0 dishes with only one kind of protein.

Type: operand_swap
Question: Hans reserved a table at a fine dining restaurant for twelve people. He has to p...

Original:  Hans’s party includes 12 - 2 = 10 adults and 2 children.
Perturbed: Hans’s party includes 2 - 12 = -10 adults and 2 children.

Type: operator_replace
Question: Savannah is wrapping presents for her friends and family for Christmas. She has ...

Original:  Savannah has 12 gifts to give her friends and family and has already wrapped 3 gifts + 5 gifts = 8 gifts already wrapped with the first two rolls of paper.
Perturbed: Savannah has 12 gifts to give her friends and family and has already wrapped 3 gifts * 5 gifts = 15 gifts already wrapped 