In [2]:
class cfg:
    # Model
    #model_path = '/kaggle/input/phi-3/transformers/phi-3-mini-128k-instruct/1/Phi-3-mini-128k-instruct'
    model_path = '/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1'
    #model_path = '/kaggle/input/llama-3/transformers/8b-chat-hf/1'
    #model_path = '/kaggle/input/llama-3.1/transformers/8b-instruct/1'
    max_model_len = 8192 #61000 for phi-3
    # Dataset
    dataset_path = r'C:\Users\gavin\OneDrive\Documents\ds_files\kaggle_arc\data\2024\arc-agi_training_challenges.json'
    #dataset_path = '/kaggle/input/arc-prize-2024/arc-agi_evaluation_challenges.json'
    #dataset_path = '/kaggle/input/arc-prize-2024/arc-agi_test_challenges.json'
    n_tasks = None # Optional parameter to limit the number of task in the inference, set it to None to use all the tasks
    # Few-shot
    few_shot_dataset_path = r'C:\Users\gavin\OneDrive\Documents\ds_files\kaggle_arc\data\2024\arc-agi_training_challenges.json'
    n_shots = 0
    # Inference params
    max_predictions_per_task = 2 # 
    sampling_params = dict(temperature=0.0, max_tokens=2000) # https://docs.vllm.ai/en/latest/dev/sampling_params.html

In [1]:
# is_dry_run = cfg.dataset_path == '/kaggle/input/arc-prize-2024/arc-agi_test_challenges.json' and not os.getenv('KAGGLE_IS_COMPETITION_RERUN')
# if is_dry_run:
#     print('This is a dry run, no inference nor installation of packages will be done')
is_dry_run = False

In [None]:
# %%time
# if not is_dry_run:
#     try:
#         import vllm
#     except ImportError:
#         !pip uninstall -q -y torch
#         !pip install -q --no-index --find-links=/kaggle/input/making-wheels-of-necessary-packages-for-vllm vllm
#     # model imports
#     from vllm import LLM, SamplingParams
#     from transformers import AutoTokenizer

In [54]:
from abc import ABC, abstractmethod
import json
import os
from tqdm.auto import tqdm
import numpy as np
# from itertools import islice, product
import matplotlib.pyplot as plt
from matplotlib import colors
from termcolor import colored

In [6]:
class GridEncoder(ABC):
    @abstractmethod
    def to_text(self, grid):
        pass
    
    @abstractmethod
    def to_grid(self, text):
        pass

In [9]:
sample_grid = np.eye(3, dtype=int).tolist()

def test_translator(translator):
    assert sample_grid == translator.to_grid(translator.to_text(sample_grid))
    print(translator.to_text(sample_grid))

In [10]:
class MinimalGridEncoder(GridEncoder):
    @staticmethod
    def to_text(grid):
        text = '\n'.join([''.join([str(x) for x in line]) for line in grid])
        return text
    
    @staticmethod
    def to_grid(text):
        lines = text.strip().splitlines()
        grid = [[int(x) for x in line] for line in lines]
        return grid
        
test_translator(MinimalGridEncoder())

100
010
001


In [15]:
class GridWithSeparationEncoder(GridEncoder):
    def __init__(self, split_symbol):
        self.split_symbol = split_symbol

    def to_text(self, grid):
        text = '\n'.join([self.split_symbol.join([str(x) for x in line]) for line in grid])
        return text
    
    def to_grid(self, text):
        lines = text.strip().splitlines()
        grid = [[int(x) for x in line.split(self.split_symbol)] for line in lines]
        return grid
        
test_translator(GridWithSeparationEncoder('|'))

1|0|0
0|1|0
0|0|1


In [16]:
class GridCodeBlockEncoder(GridEncoder):
    def __init__(self, base_encoder):
        self.encoder = base_encoder
    
    def to_text(self, grid):
        text = f'```grid\n{self.encoder.to_text(grid)}\n```'
        return text
    
    def to_grid(self, text):
        grid_text = text.split('```grid\n')[1].split('\n```')[0]
        grid = self.encoder.to_grid(grid_text)
        return grid
        
test_translator(GridCodeBlockEncoder(MinimalGridEncoder()))

test_translator(GridCodeBlockEncoder(GridWithSeparationEncoder('|')))

```grid
100
010
001
```
```grid
1|0|0
0|1|0
0|0|1
```


In [17]:
class PromptCreator(ABC):
    def __init__(self, grid_encoder: GridEncoder):
        self.grid_encoder = grid_encoder
    
    @abstractmethod
    def create_task_prompts(self, task):
        pass
    
    @abstractmethod
    def parse_response(self, text):
        pass

In [18]:
class SimplePromptCreator(PromptCreator):

    def create_task_prompts(self, task):
        if cfg.model_path == '/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1':
            # Mistral does not have system prompt
            messages = []
        else:
            messages = [ 
                {"role": "system", "content": "You are a helpful AI assistant. Your task is to answer to the user using always the same transformation of the user input."}, 
            ] 
        for sample in task['train']:
            messages.append({"role": "user", "content": f"Input:\n{self.grid_encoder.to_text(sample['input'])}"})
            messages.append({"role": "assistant", "content": f"Output:\n{self.grid_encoder.to_text(sample['output'])}"})

        prompts = []
        for test_sample in task['test']:
            final_message = {"role": "user", "content": f"Input:\n{self.grid_encoder.to_text(test_sample['input'])}"}
            prompt = tokenizer.apply_chat_template(messages + [final_message],
                                                   tokenize=False,
                                                   add_generation_prompt=True)
            prompts.append(prompt)
        return prompts
    
    def parse_response(self, text):
        grid_text = text.split('Output:\n')[1]
        return self.grid_encoder.to_grid(grid_text)

In [22]:
puzzle_explanations = {
    '00576224': """The pattern of the input is repeated to generate the output.

1. The first two rows are obtained by simply repeating the 2x2 pattern 3 times along the cols axis.
2. The following two rows are obtained by flipping the pattern horizontally and repeating it 3 times
3. The final two rows are identical to the first ones, simply repeat the 2x2 pattern 3 times.

Thus the output is 3 times bigger than the input (6x6 vs 2x2) because the pattern is repeated 3 times in the row and col axis.""",
    '009d5c81': """To create the output we have to copy the input with two modifications:

1. The object with color 1 is removed and replaced with the background color 0
2. The color of the other object (there are only two objects in the grid) is modified.
  The new color of this object depends on the shape of the object of color 1. There is a mapping
  between shapes and colors. Just look at the train examples for an object of the same shape
  and see the color that is applied on the output.""",
    '00dbd492': """The input shows a square with color 2 that is empty except from a point in the center.
The output is created by colorizing the inside of the square. The color is chosen depending on the size of the squares.
The larger square is painted with 3, the medium with 4 and the small with 8.""",
    '03560426': """The input shows objects of different colors at the bottom of the grid.
The output is created by moving the objects to the top left corner. The objects are moved from left to right order.
The first object is placed at the top left corner, the second object is placed at the lower right corner of the first object,
the third object is placed at the lower right corner of the second object and so on. There is oclusion between the objects,
in those oclusions we see the rightmost object.""",
    '0607ce86': """This is a denoising task. The input shows the same object repeated many times, but there are noisy pixels in the grid.
The output is created by removing all the noise in the grid. The background should be completely 0.
The real object without noise can be guessed because there are many repetitions of the object, so we simply have to
look at the majority pixel on each location.""",
    '0692e18c': """The ouptut is created following this steps.

1. The input is upscaled x3. So if the input is 3x3 the output should be an upscaled version of the input 9x9
2. We apply an AND function in a sliding window fashion over the output using the inverted input pattern (take the input and swicth the background color 0 with the other color and viceversa)
    """,
    '070dd51e': """The output is created by simply drawing horizontal and vertical lines between cells with the same color.
If there is an intersection between lines the vertical line will be shown.""",
    '08573cc6': """The output is created by drawing an spiral that starts at the cell with color 1.
The colors of the spiral are taken from the first two cells of the grid, which will be removed in the output.""",
    '0a2355a6': """The output is created by copying the input and changing the color of the objects.
The new color will be chosen depending on the number of holes of the object. There is a mapping between number of holes and color that can be observed from the input examples.""",
}

In [35]:
class FewShotPromptCreator(PromptCreator):
    task_description = """You are a helpful AI assistant. Your job is to solve tasks from the Abstraction and Reasoning Challenge (ARC). 
The user will present you with sample input and output grids for each task. 
Your job will be to understand the transformation between the input and the output and apply it to the last input grid given by the user. 
The puzzle-like inputs and outputs present a grid where each square can be one of ten colors. A grid can be any height or width between 1x1 and 30x30.
The background of the grid is typically colored with 0.
The tasks from ARC are based on the following priors:

- Objectness: Objects persist and cannot appear or disappear without reason. Objects can interact or not depending on the circumstances.
- Goal-directed: Objects can be animate or inanimate. Some objects are "agents" - they have intentions and they pursue goals.
- Numbers & counting: Objects can be counted or sorted by their shape, appearance, or movement using basic mathematics like addition, subtraction, and comparison.
- Basic geometry & topology: Objects can be shapes like rectangles, triangles, and circles which can be mirrored, rotated, translated, deformed, combined, repeated, etc. Differences in distances can be detected.

The transformations between input and output should be based on these priors.
"""
    def __init__(self, grid_encoder):
        super().__init__(grid_encoder)
        with open(cfg.few_shot_dataset_path, 'r') as f:
            self.few_shot_tasks = json.load(f)
        with open(cfg.few_shot_dataset_path.replace('challenges.json', 'solutions.json'), 'r') as f:
            self.few_shot_solutions = json.load(f)
        self.few_shot_tasks = {task_id: self.few_shot_tasks[task_id] for task_id in puzzle_explanations}
        self.few_shot_solutions = {task_id: self.few_shot_solutions[task_id] for task_id in puzzle_explanations}
        self.few_shot_task_ids = list(self.few_shot_tasks.keys())
        self.n_shots = cfg.n_shots
    
    def create_task_prompts(self, task):
        messages = [{"role": "system", "content": self.task_description}]
        
        for task_id in np.random.choice(self.few_shot_task_ids, self.n_shots):
            few_shot_task = self.few_shot_tasks[task_id]
            user_message = self.create_user_message_for_train_examples(few_shot_task)
            for test_idx, test_sample in enumerate(few_shot_task['test']):
                user_message += self.create_input_message('Test case', test_sample)
                messages.append({"role": "user", "content": user_message})
                user_message = ''
                assistant_message = f'{puzzle_explanations[task_id]}\n\n' + self.create_output_message(self.few_shot_solutions[task_id][test_idx])
                messages.append({"role": "assistant", "content": assistant_message})

        user_message = self.create_user_message_for_train_examples(task)        
        prompts = []
        for test_sample in task['test']:
            user_message += self.create_input_message('Test case', test_sample)
            messages.append({"role": "user", "content": user_message})
            prompt = tokenizer.apply_chat_template(messages,
                                                   tokenize=False,
                                                   add_generation_prompt=True)
            prompts.append(prompt)
        return prompts
    
    def create_user_message_for_train_examples(self, task):
        user_message = "Let's see if you can solve this simple ARC task. These are some input-output grid examples that define the task.\n"
        for example_idx, sample in enumerate(task['train']):
            user_message += self.create_input_message(f'Example {example_idx}', sample)
            user_message += '\n' + self.create_output_message(sample['output'])
        return user_message

    def create_input_message(self, title, sample):
        return f"\n## {title}\n\n### Input\n\n{self.grid_encoder.to_text(sample['input'])}\n"
    
    def create_output_message(self, grid):
        return f"### Output\n\n{self.grid_encoder.to_text(grid)}\n"
    
    def parse_response(self, text):
        return self.grid_encoder.to_grid(text)

In [36]:
def print_sample_prompt(data, prompt_creator):
    prompts = [prompt_creator.create_task_prompts(task)[0] for task in data.values()]
    prompts = sorted(prompts, key=lambda x: len(x))
    pretty_print_prompt(prompts[0])
    
def pretty_print_prompt(text):
    color = 'black'
    attrs = None
    for line in text.splitlines():
        if line.startswith('<|assistant|>'):
            color = 'blue'
        elif line.startswith('<|user|>'):
            color = 'black'
        elif line.startswith('<|system|>'):
            color = 'green'
            
        if line.startswith('<'):
            attrs = ['bold']
        else:
            attrs = None
        print(colored(line, color, attrs=attrs))

In [49]:
def plot_input_token_length_distribution(data, prompt_creator):
    prompts = []
    for task in data.values():
        prompts.extend(prompt_creator.create_task_prompts(task))
    token_length_distribution = [len(tokenizer.tokenize(prompt)) for prompt in tqdm(prompts)]
    plt.title('Prompt token length distribution')
    plt.hist(token_length_distribution)
    plt.xlabel('n tokens')