In [1]:
from pathlib import Path
import sys
project_root = next((parent for parent in [Path.cwd()] + list(Path.cwd().parents) if (parent / "pyproject.toml").exists()), Path.cwd())
sys.path.append(str(project_root))

In [2]:
from glob import glob
import pandas as pd

from llm_python.datasets.io import read_soar_parquet

# Get all parquet file paths
parquet_files = glob("/tmp/superking_snapshot/*.parquet")

# Read each parquet file into a dataframe using read_soar_parquet()
dfs = []
for f in parquet_files:
    try:
        df = read_soar_parquet(f)
        dfs.append(df)
    except Exception as e:
        print(f"Error reading {f}: {e}")

# Merge all dataframes into a single dataframe
merged_df = pd.concat(dfs, ignore_index=True)
print(f"Merged {len(parquet_files)} parquet files with total {len(merged_df)} rows.")

Error reading /tmp/superking_snapshot/20250825_083015_Trelis_Qwen3-4B_ds-arc-agi-1-partial-100-c1542_arc-prize-2024_evaluation.parquet: Couldn't deserialize thrift: don't know what type: 
Deserializing page header failed.

Merged 60 parquet files with total 233410 rows.


In [3]:
from llm_python.transduction.code_classifier import CodeTransductionClassifier
from tqdm import tqdm

transduction_classifier = CodeTransductionClassifier()

print("Applying transduction classifier...")
merged_df["is_transductive"] = [
    transduction_classifier.is_transductive(row["code"])[0]
    for _, row in tqdm(merged_df.iterrows(), total=len(merged_df))
]

Applying transduction classifier...


100%|██████████| 233410/233410 [05:21<00:00, 725.84it/s] 


In [4]:
print(dfs[0].info())
print(dfs[0].head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype                                             
---  ------                  --------------  -----                                             
 0   task_id                 62 non-null     string[pyarrow]                                   
 1   reasoning               62 non-null     string[pyarrow]                                   
 2   code                    62 non-null     string[pyarrow]                                   
 3   correct_train_input     62 non-null     list<item: bool>[pyarrow]                         
 4   correct_test_input      62 non-null     list<item: bool>[pyarrow]                         
 5   predicted_train_output  62 non-null     list<item: list<item: list<item: int64>>>[pyarrow]
 6   predicted_test_output   62 non-null     list<item: list<item: list<item: int64>>>[pyarrow]
 7   model                   62 no

In [5]:
from llm_python.datasets.io import validate_soar_dataframe_schema
from llm_python.utils.numpy import convert_numpy_types

merged_df["predicted_train_output"] = merged_df["predicted_train_output"].apply(convert_numpy_types)
merged_df["correct_train_input"] = merged_df["correct_train_input"].apply(convert_numpy_types)
merged_df["predicted_test_output"] = merged_df["predicted_test_output"].apply(convert_numpy_types)
merged_df["correct_test_input"] = merged_df["correct_test_input"].apply(convert_numpy_types)
validate_soar_dataframe_schema(merged_df)


In [6]:
from llm_python.datasets.validation import validate_soar_row

print("Validating rows formats...")
valid_mask = []
errors = []
for i, row in tqdm(merged_df.iterrows(), total=len(merged_df)):
    result = validate_soar_row(row)
    valid_mask.append(result.is_valid)
    if not result.is_valid:
        errors.append((i, result.errors))

# Filter out invalid rows
merged_df = merged_df[pd.Series(valid_mask, index=merged_df.index)]
print(f"Filtered dataframe: {len(merged_df)} valid rows out of {len(valid_mask)} total.")
print(f"Total invalid rows: {len(errors)}")

Validating rows formats...


  0%|          | 1/233410 [00:00<8:14:14,  7.87it/s]

Row 178 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 220 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 293 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']


  2%|▏         | 4583/233410 [00:00<00:37, 6126.52it/s]

Row 3675 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 3698 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 3835 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']
Row 3927 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']


  9%|▉         | 20573/233410 [00:03<00:23, 8931.97it/s] 

Row 19786 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 19877 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 20108 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 20746 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 20748 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out o

 10%|█         | 23544/233410 [00:03<00:18, 11496.90it/s]

Row 21490 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 22918 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 23026 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 23805 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 24147 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'p

 12%|█▏        | 27764/233410 [00:03<00:15, 13007.02it/s]

Row 25099 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 25100 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 25115 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 25137 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 25138 is invalid: ['predicted_test_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 25152 is invalid: ['pre

 14%|█▍        | 33728/233410 [00:04<00:14, 13356.26it/s]

Row 31252 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 31365 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row is empty.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row is empty.']
Row 32594 is invalid: ['predicted_test_output: Grid is empty.', 'predicted_test_output: Grid is empty.']
Row 32601 is invalid: ['predicted_test_output: Grid is empty.', 'predicted_test_output: Grid is empty.']
Row 32604 is invalid: ['predicted_test_output: Grid is empty.', 'predicted_test_output: Grid is empty.']
Row 32606 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_test_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_test_output: Grid is empty.']
Row 32617 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_trai

 16%|█▌        | 36605/233410 [00:04<00:14, 13352.41it/s]

Row 35770 is invalid: ['predicted_train_output: First row is empty.', 'predicted_train_output: First row is empty.', 'predicted_train_output: First row is empty.', 'predicted_train_output: First row is empty.', 'predicted_train_output: First row is empty.', 'predicted_train_output: First row is empty.']
Row 35773 is invalid: ['predicted_train_output: First row is empty.', 'predicted_train_output: First row is empty.']
Row 36498 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']
Row 36502 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']
Row 36593 is

 18%|█▊        | 41947/233410 [00:05<00:19, 9747.88it/s] 

Row 40653 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']


 20%|██        | 47330/233410 [00:05<00:15, 12403.55it/s]

Row 44584 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 44586 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_test_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_test_output: Grid is empty.']
Row 44587 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 44646 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 45887 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 

 21%|██        | 48835/233410 [00:05<00:14, 13147.94it/s]

Row 47392 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 47540 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']


 23%|██▎       | 53311/233410 [00:06<00:13, 13017.45it/s]

Row 51277 is invalid: ['predicted_test_output: Invalid grid width.', 'predicted_test_output: Invalid grid width.']
Row 51427 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']


 27%|██▋       | 63633/233410 [00:06<00:12, 13213.41it/s]

Row 62208 is invalid: ['predicted_train_output: First row is empty.', 'predicted_test_output: First row is empty.', 'predicted_train_output: First row is empty.', 'predicted_test_output: First row is empty.']
Row 63165 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 63572 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']


 31%|███       | 71284/233410 [00:07<00:17, 9011.51it/s] 

Row 70236 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']


 32%|███▏      | 74196/233410 [00:07<00:14, 11181.21it/s]

Row 72032 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 72033 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 74968 is invalid: ['predicted_train_output: Invalid grid width.', 'predicted_train_output: Invalid grid width.']


 33%|███▎      | 77110/233410 [00:08<00:15, 10375.42it/s]

Row 76092 is invalid: ['predicted_test_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']


 34%|███▍      | 80034/233410 [00:08<00:12, 12197.18it/s]

Row 79610 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']


 35%|███▌      | 82451/233410 [00:08<00:17, 8879.65it/s] 

Row 81490 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 82164 is invalid: ['predicted_train_output: Invalid grid width.', 'predicted_train_output: Invalid grid width.']
Row 83117 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']


 37%|███▋      | 85957/233410 [00:09<00:15, 9791.40it/s]

Row 84815 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']
Row 85013 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 85015 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 86125 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 86172 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 86862 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']


 39%|███▉      | 92130/233410 [00:09<00:11, 11846.60it/s]

Row 90815 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']
Row 90820 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 90827 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 90915 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 91629 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.

 43%|████▎     | 99510/233410 [00:10<00:09, 14870.13it/s]

Row 96003 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']
Row 96005 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']
Row 96006 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 96007 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 96009 is invalid: ['predicted_train_output: Cell value out of range.', 'pred

 46%|████▋     | 108182/233410 [00:10<00:08, 15357.12it/s]

Row 105445 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 105450 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 105474 is invali

 48%|████▊     | 112232/233410 [00:11<00:08, 14610.66it/s]

Row 110046 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']


 49%|████▉     | 115292/233410 [00:11<00:09, 12897.93it/s]

Row 114092 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 114095 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 114104 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 114510 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 114511 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 114512 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']


 51%|█████     | 117912/233410 [00:11<00:11, 10370.82it/s]

Row 117160 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 117342 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 117345 is invalid: ['predicted_test_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 117346 is invalid: ['predicted_test_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 118560 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']


 52%|█████▏    | 120318/233410 [00:11<00:10, 11029.01it/s]

Row 119630 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']


 53%|█████▎    | 123955/233410 [00:12<00:09, 11141.99it/s]

Row 122197 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 122199 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 122850 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 122858 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 122862 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 122902 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 122976 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 122984 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Inv

 54%|█████▍    | 126214/233410 [00:12<00:10, 10152.67it/s]

Row 125082 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']


 57%|█████▋    | 132498/233410 [00:13<00:10, 9895.06it/s] 

Row 131647 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 131654 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 131887 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']


 58%|█████▊    | 136326/233410 [00:13<00:08, 11523.84it/s]

Row 133568 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 133576 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 136064 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']


 59%|█████▉    | 138838/233410 [00:13<00:08, 11320.05it/s]

Row 136436 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 136599 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 137281 is invalid: ['predicted_test_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 138090 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 138197 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 138592 is invalid: ['predicted_test_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']


 61%|██████    | 142242/233410 [00:13<00:06, 14090.28it/s]

Row 139089 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 139828 is invalid: ['predicted_train_output: Row is empty.', 'predicted_train_output: Row is empty.']


 63%|██████▎   | 146075/233410 [00:14<00:05, 15177.49it/s]

Row 143126 is invalid: ['predicted_train_output: Invalid grid width.', 'predicted_train_output: Invalid grid width.']
Row 143137 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']


 65%|██████▍   | 150823/233410 [00:14<00:05, 13803.40it/s]

Row 148270 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 149970 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 149971 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 149997 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 150004 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 150005 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 150013 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 150029 is invalid: ['predicted_test_output: Invalid grid height.', 'predic

 66%|██████▌   | 153570/233410 [00:14<00:07, 10527.70it/s]

Row 152512 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 152514 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 152516 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 154137 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 154167 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 154292 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.', 'predicted_train_outp

 68%|██████▊   | 159178/233410 [00:15<00:05, 13018.08it/s]

Row 156853 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 157253 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 157355 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 157357 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 157360 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 157362 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid heig

 70%|██████▉   | 162900/233410 [00:15<00:05, 12916.97it/s]

Row 161016 is invalid: ['predicted_test_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 161027 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']


 71%|███████   | 165511/233410 [00:15<00:06, 10196.04it/s]

Row 164063 is invalid: ['predicted_test_output: Invalid grid width.', 'predicted_test_output: Invalid grid width.']
Row 164066 is invalid: ['predicted_test_output: Invalid grid width.', 'predicted_test_output: Invalid grid width.']
Row 164086 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 164305 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']


 73%|███████▎  | 171037/233410 [00:16<00:05, 10695.62it/s]

Row 169152 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 170234 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 170236 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 170243 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 170283 is invalid: ['predicted_train_output: First row is empty.', 'predicted_train_output: First row is empty.']
Row 170311 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 170317 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 170409 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 170416 is invali

 75%|███████▌  | 176053/233410 [00:16<00:04, 13206.39it/s]

Row 173444 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']


 77%|███████▋  | 178726/233410 [00:16<00:04, 11823.26it/s]

Row 176677 is invalid: ['predicted_train_output: Row is empty.', 'predicted_test_output: Row is empty.', 'predicted_train_output: Row is empty.', 'predicted_test_output: Row is empty.']
Row 176842 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 176869 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted

 78%|███████▊  | 181593/233410 [00:17<00:04, 12882.82it/s]

Row 179400 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']


 80%|████████  | 187163/233410 [00:17<00:03, 12105.75it/s]

Row 185740 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 185758 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 185773 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 185778 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.',

 82%|████████▏ | 190965/233410 [00:17<00:03, 11465.13it/s]

Row 189187 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.']
Row 190294 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.']
Row 190761 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 190770 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 190773 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 190774 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 190778 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 190784 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 190785 is invali

 83%|████████▎ | 193238/233410 [00:18<00:04, 9883.28it/s] 

Row 191971 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 192014 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 192833 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 192871 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 192872 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 192874 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 192944 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 193000 is invalid: ['predicted_test_output: Invalid grid height.',

 84%|████████▍ | 196097/233410 [00:18<00:03, 11807.52it/s]

Row 194194 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']


 86%|████████▌ | 200599/233410 [00:18<00:02, 14153.45it/s]

Row 198005 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 198013 is invalid: ['predicted_train_output: Grid is empty.', 'predicted_train_output: Grid is empty.']
Row 198113 is invalid: ['predicted_train_output: Invalid grid width.', 'predicted_train_output: Invalid grid width.']
Row 198144 is invalid: ['predicted_train_output: Invalid grid width.', 'predicted_train_output: Invalid grid width.']


 88%|████████▊ | 205148/233410 [00:18<00:01, 14382.22it/s]

Row 201989 is invalid: ['predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_train_output: Row length mismatch.', 'predicted_test_output: Row length mismatch.']
Row 202902 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 202929 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 202946 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 202974 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 203013 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']


 94%|█████████▎| 218726/233410 [00:20<00:02, 6999.93it/s] 

Row 217412 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 217583 is invalid: ['predicted_test_output: Grid is empty.', 'predicted_test_output: Grid is empty.']
Row 217763 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 217788 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 217824 is invalid: ['predicted_train_output: Invalid grid height.', 'predicted_train_output: Invalid grid height.']
Row 217842 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']


 95%|█████████▍| 221627/233410 [00:21<00:01, 8646.63it/s]

Row 220218 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 220272 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_outp

 96%|█████████▌| 223301/233410 [00:21<00:01, 6776.13it/s]

Row 222174 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 222272 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 222409 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 222427 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range

 99%|█████████▉| 231736/233410 [00:22<00:00, 13062.08it/s]

Row 230811 is invalid: ['predicted_test_output: Invalid grid width.', 'predicted_test_output: Invalid grid width.']
Row 230824 is invalid: ['predicted_test_output: Invalid grid height.', 'predicted_test_output: Invalid grid height.']
Row 231355 is invalid: ['predicted_test_output: Invalid grid width.', 'predicted_test_output: Invalid grid width.']
Row 231614 is invalid: ['predicted_test_output: Grid is empty.', 'predicted_test_output: Grid is empty.']
Row 231620 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 231621 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 231644 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 231877 is invalid: ['predicted_train_output: Cell value out of range.', 'predi

100%|██████████| 233410/233410 [00:22<00:00, 10376.67it/s]

Row 232949 is invalid: ['predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_train_output: Cell value out of range.', 'predicted_test_output: Cell value out of range.']
Row 233168 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']
Row 233386 is invalid: ['predicted_test_output: Grid is not a list.', 'predicted_test_output: Grid is not a list.']





Filtered dataframe: 233049 valid rows out of 233410 total.
Total invalid rows: 361


In [7]:
from llm_python.datasets.validation import validate_soar_dataframe_correctness


print("Validating row correctness...")

correctness_result = validate_soar_dataframe_correctness(merged_df, correctness_samples=1000)
print(correctness_result.summary())
if not correctness_result.is_valid:
    raise ValueError(
        "Validation failed: Some programs do not meet the correctness requirements."
    )

Validating row correctness...


Correctness validation: 100%|██████████| 1000/1000 [00:31<00:00, 32.18it/s]

Correctness validation:
    Total programs: 233049
    Sample size: 1000
    Correctness valid: FAIL
    Errors: 14
    Sample errors:
      Row 154443, Train Output 0: predicted != actual
      Row 154443, Train Output 1: predicted != actual
      Row 154443, Test Output 0: predicted != actual





In [8]:
from llm_python.utils.arc_tester import ArcTester
from llm_python.utils.task_loader import get_task_loader
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import time

print(f"Original dataset shape: {df.shape}")
print(f"Original columns: {df.columns.tolist()}")

task_loader = get_task_loader()


def process_single_row(row_data):
    """Process a single row - this function will be called in parallel"""
    idx, row = row_data
    try:
        # Create instances inside the worker process
        arc_tester = ArcTester()

        result = arc_tester.test_program(
            row["code"], task_loader.get_task(row["task_id"])
        )

        # Create corrected row with actual values from arc_tester
        corrected_row = row.copy()
        corrected_row["predicted_train_output"] = result.train_outputs
        corrected_row["predicted_test_output"] = result.test_outputs
        corrected_row["correct_train_input"] = result.correct_train_input
        corrected_row["correct_test_input"] = result.correct_test_input

        return ("success", idx, corrected_row)

    except Exception as e:
        return ("failed", idx, str(e))


# Determine optimal number of workers
num_workers = min(mp.cpu_count() - 2, 8)  # Don't use too many to avoid memory issues
print(f"Using {num_workers} parallel workers")

total_rows = len(merged_df)
all_corrected_rows = []
all_failed_indices = []

print(f"Processing {total_rows}...")

start_time = time.time()

corrected_rows = []
failed_indices = []

# Process batch in parallel with progress bar
with ProcessPoolExecutor(max_workers=num_workers) as executor:
    futures = {
        executor.submit(process_single_row, row): row[0] for row in merged_df.iterrows()
    }
    corrected_count = 0
    failed_count = 0

    pbar = tqdm(
        as_completed(futures),
        total=len(futures),
        desc="Processing Rows",
        unit=" row",
        bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]",
    )

    for future in pbar:
        try:
            result_type, idx, result_data = future.result()
            if result_type == "success":
                corrected_rows.append(result_data)
                corrected_count += 1
            else:
                failed_indices.append(idx)
                failed_count += 1
        except Exception as e:
            failed_count += 1
            failed_idx = futures[future]
            failed_indices.append(failed_idx)
            print(f"\nAn error occurred processing row {failed_idx}: {e}")

        pbar.set_postfix(completed=corrected_count, failed=failed_count, refresh=True)

print("\nProcessing complete!")
print(f"Total successful rows: {len(corrected_rows)}")
print(f"Total failed rows: {len(failed_indices)}")
print(f"Total time: {(time.time() - start_time) / 60:.1f} minutes")


Original dataset shape: (13, 9)
Original columns: ['task_id', 'reasoning', 'code', 'correct_train_input', 'correct_test_input', 'predicted_train_output', 'predicted_test_output', 'model', 'is_transductive']
Using 8 parallel workers
Processing 233049...


Processing Rows: 100%|██████████| 233049/233049 [38:39<00:00, 100.49 row/s, completed=233049, failed=0] 



Processing complete!
Total successful rows: 233049
Total failed rows: 0
Total time: 38.8 minutes


In [21]:
fixed_df = pd.DataFrame(corrected_rows)
fixed_df = fixed_df[~fixed_df["code"].str.lower().str.contains("random|randbelow|rvs")]
print(f"Kept {len(fixed_df)}/{len(corrected_rows)} rows after filtering for randomness.")

Kept 232880/233049 rows after filtering for randomness.


In [26]:
from llm_python.datasets.validation import validate_soar_dataframe_correctness


print("Validating row correctness...")

correctness_result = validate_soar_dataframe_correctness(fixed_df, correctness_samples=10000, seed=41)
print(correctness_result.summary())
if not correctness_result.is_valid:
    raise ValueError(
        "Validation failed: Some programs do not meet the correctness requirements."
    )

Validating row correctness...


Correctness validation: 100%|██████████| 10000/10000 [05:09<00:00, 32.36it/s]

Correctness validation:
    Total programs: 232880
    Sample size: 10000
    Correctness valid: PASS
    Errors: 0





In [27]:
from llm_python.datasets.io import write_soar_parquet


write_soar_parquet(fixed_df, "/tmp/superking_merged_and_cleaned.parquet")