In [3]:
from datasets import Dataset, DatasetDict, load_dataset
from typing import Dict, Any
from vllm import LLM
from vllm.sampling_params import SamplingParams

In [8]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
dataset_name = "lordspline/arc-agi"
dataset = load_dataset("lordspline/arc-agi")

print(dataset['training'][0])

{'train': [{'input': [[0, 0, 5], [0, 5, 0], [5, 0, 0]], 'output': [[3, 3, 3], [4, 4, 4], [2, 2, 2]]}, {'input': [[0, 0, 5], [0, 0, 5], [0, 0, 5]], 'output': [[3, 3, 3], [3, 3, 3], [3, 3, 3]]}, {'input': [[5, 0, 0], [0, 5, 0], [5, 0, 0]], 'output': [[2, 2, 2], [4, 4, 4], [2, 2, 2]]}, {'input': [[0, 5, 0], [0, 0, 5], [0, 5, 0]], 'output': [[4, 4, 4], [3, 3, 3], [4, 4, 4]]}], 'test': [{'input': [[0, 0, 5], [5, 0, 0], [0, 5, 0]], 'output': [[3, 3, 3], [2, 2, 2], [4, 4, 4]]}]}


In [5]:
def format_grid(grid_input):
    # If input is already a list, use it directly
    if isinstance(grid_input, list):
        grid = grid_input
    else:
        # Convert string representation to list if needed
        import ast
        grid = ast.literal_eval(grid_input)
    
    # Format each row with spaces between numbers
    formatted_rows = []
    for row in grid:
        formatted_row = ' '.join(str(num) for num in row)
        formatted_rows.append(formatted_row)
    
    # Join rows with newlines
    return '\n'.join(formatted_rows)


def format_prompt(example):
    # Start with empty prompt
    prompt = "Find the common rule that maps an input grid to an output grid given the examples below.\n\n"
    
    # Add each training example as context
    for idx, train_example in enumerate(example['train']):
        prompt += f"Example {idx + 1}:\n\n"
        prompt += f"Input:\n{format_grid(train_example['input'])}\n"  # Assuming input is already in list format
        prompt += f"Output:\n{format_grid(train_example['output'])}\n\n"
    
    # Add the test question
    prompt += "\nBelow is a test input grid. Predict the corresponding output grid by applying the rule you found. Your final answer should just be the text output grid itself.\n\n"
    prompt += f"Input:\n{format_grid(example['test'][0]['input'])}\n"
    prompt += "Output:"
    
    return prompt

# Example usage:
# Assuming dataset is loaded
sample = dataset['training'][0]  # Get first example
formatted_prompt = format_prompt(sample)
print(formatted_prompt)

Find the common rule that maps an input grid to an output grid given the examples below.

Example 1:

Input:
0 0 5
0 5 0
5 0 0
Output:
3 3 3
4 4 4
2 2 2

Example 2:

Input:
0 0 5
0 0 5
0 0 5
Output:
3 3 3
3 3 3
3 3 3

Example 3:

Input:
5 0 0
0 5 0
5 0 0
Output:
2 2 2
4 4 4
2 2 2

Example 4:

Input:
0 5 0
0 0 5
0 5 0
Output:
4 4 4
3 3 3
4 4 4


Below is a test input grid. Predict the corresponding output grid by applying the rule you found. Your final answer should just be the text output grid itself.

Input:
0 0 5
5 0 0
0 5 0
Output:


# Self adapting dataset to clean up arg-agi

In [6]:
class SelfAdaptingDataset:
    def __init__(self, dataset):
        self.original_dataset = dataset
        self.formatted_dataset = self._transform_dataset()
    
    def _transform_single_example(self, example: Dict[str, Any]) -> Dict[str, Any]:
        """Transform a single example from train/test format to prompt/completion format."""
        prompt = f'<|user|>{format_prompt(example)}<|user|><|assistant|><think>\n'
        completion = format_grid(example['test'][0]['output'])
        
        return {
            'prompt': prompt,
            'completion': completion,
            'learned': '',  # Empty placeholder for learned field
            'mistake': '',  # Empty placeholder for mistake field
            'best_completion': '',  # Empty placeholder for best completion field
            'original_data': example  # Keep original data for reference if needed
        }
    
    def _transform_dataset(self) -> DatasetDict:
        """Transform the entire dataset."""
        
        def transform_split(split_dataset):
            transformed_data = {
                'prompt': [],
                'completion': [],
                'learned': [],
                'mistake': [],
                'best_completion': [],
                'original_data': []
            }
            
            for example in split_dataset:
                transformed = self._transform_single_example(example)
                for key in transformed_data:
                    transformed_data[key].append(transformed[key])
            
            return Dataset.from_dict(transformed_data)
        
        # Transform each split
        transformed_dataset = DatasetDict({
            'training': transform_split(self.original_dataset['training']),
            'evaluation': transform_split(self.original_dataset['evaluation'])
        })
        
        return transformed_dataset
    
    def get_dataset(self) -> DatasetDict:
        """Get the transformed dataset."""
        return self.formatted_dataset
    
    def update_example(self, index: int, split: str, 
                      learned: str = None, 
                      mistake: str = None, 
                      best_completion: str = None):
        """
        Update the learned/mistake/best_completion fields for a specific example.
        
        Args:
            index: Index of the example to update
            split: 'training' or 'evaluation'
            learned: What the model learned from this example
            mistake: What mistake was made
            best_completion: The best completion found so far
        """
        if learned is not None:
            self.formatted_dataset[split] = self.formatted_dataset[split].map(
                lambda x, i: {'learned': learned} if i == index else {'learned': x['learned']},
                with_indices=True
            )
        
        if mistake is not None:
            self.formatted_dataset[split] = self.formatted_dataset[split].map(
                lambda x, i: {'mistake': mistake} if i == index else {'mistake': x['mistake']},
                with_indices=True
            )
        
        if best_completion is not None:
            self.formatted_dataset[split] = self.formatted_dataset[split].map(
                lambda x, i: {'best_completion': best_completion} if i == index else {'best_completion': x['best_completion']},
                with_indices=True
            )

In [7]:
# Example usage:
dataset = load_dataset("lordspline/arc-agi")
adapted_dataset = SelfAdaptingDataset(dataset)
transformed_dataset = adapted_dataset.get_dataset()

# Print first example
print(transformed_dataset['training'][0])

{'prompt': 'Find the common rule that maps an input grid to an output grid given the examples below.\n\nExample 1:\n\nInput:\n0 0 5\n0 5 0\n5 0 0\nOutput:\n3 3 3\n4 4 4\n2 2 2\n\nExample 2:\n\nInput:\n0 0 5\n0 0 5\n0 0 5\nOutput:\n3 3 3\n3 3 3\n3 3 3\n\nExample 3:\n\nInput:\n5 0 0\n0 5 0\n5 0 0\nOutput:\n2 2 2\n4 4 4\n2 2 2\n\nExample 4:\n\nInput:\n0 5 0\n0 0 5\n0 5 0\nOutput:\n4 4 4\n3 3 3\n4 4 4\n\n\nBelow is a test input grid. Predict the corresponding output grid by applying the rule you found. Your final answer should just be the text output grid itself.\n\nInput:\n0 0 5\n5 0 0\n0 5 0\nOutput:', 'completion': '3 3 3\n2 2 2\n4 4 4', 'learned': '', 'mistake': '', 'best_completion': '', 'original_data': {'test': [{'input': [[0, 0, 5], [5, 0, 0], [0, 5, 0]], 'output': [[3, 3, 3], [2, 2, 2], [4, 4, 4]]}], 'train': [{'input': [[0, 0, 5], [0, 5, 0], [5, 0, 0]], 'output': [[3, 3, 3], [4, 4, 4], [2, 2, 2]]}, {'input': [[0, 0, 5], [0, 0, 5], [0, 0, 5]], 'output': [[3, 3, 3], [3, 3, 3], [3, 

In [21]:
def parse_grid(text: str) -> list[list[int]]:
    """
    Parses a 2D grid of integers from a given text string.
    Any line that can be split purely into integers is assumed to be part of the grid.
    Lines that contain text or cannot be parsed as integers are ignored.
    
    :param text: A string containing arbitrary text and lines with integers.
    :return: A list of lists of integers representing the parsed grid.
    """
    main_grid = []
    grid = []
    
    for line in text.splitlines():
        line = line.strip()
        
        # Skip empty lines
        if not line:
            continue
        
        # Attempt to parse the entire line as a row of integers
        tokens = line.split()
        try:
            row = [int(token) for token in tokens]
            grid.append(row)
        except ValueError:
            if grid != []:
                main_grid.append(grid)
                grid = []
            continue
    
    return main_grid
# Expected: [[0, 0, 5], [5, 0, 0], [0, 5, 0]]


[[4, 4, 4, 0, 14], [3, 3, 3], [4, 4, 4]]
