In [1]:
pip install faiss-cpu numpy


Looking in indexes: https://pypi.python.org/simple
Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp38-cp38-macosx_11_0_arm64.whl.metadata (3.7 kB)
Downloading faiss_cpu-1.8.0.post1-cp38-cp38-macosx_11_0_arm64.whl (6.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0.post1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [12]:
import json
import os
import numpy as np
import faiss
import random

def load_arc_tasks(task_dir):
    tasks = []
    for filename in os.listdir(task_dir):
        if filename.endswith(".json"):
            with open(os.path.join(task_dir, filename), 'r') as f:
                task = json.load(f)
                tasks.append(task)
        else:
                continue  # Skip non-JSON files
    return tasks

def process_tasks(tasks):
    inputs = []
    outputs = []
    for task in tasks:
        train_examples = task.get('train', [])
        for example in train_examples:
            input_grid = example['input']
            output_grid = example['output']
            # Flatten the grids and convert to strings
            input_str = grid_to_string(input_grid)
            inputs.append(input_str)
            outputs.append(output_grid)  # Keep the output grid as is
    return inputs, outputs

def grid_to_string(grid):
    # Flatten the grid and convert to string
    flat_list = [str(item) for sublist in grid for item in sublist]
    return ' '.join(flat_list)

def build_embeddings(inputs):
    # Simple token-based encoding
    vocab = set(' '.join(inputs).split())
    vocab = {token: idx for idx, token in enumerate(vocab)}
    embeddings = []
    for input_str in inputs:
        tokens = input_str.split()
        embedding = [vocab[token] for token in tokens]
        embeddings.append(embedding)
    return embeddings, vocab  # Do not convert to np.array here

def pad_embeddings(embeddings):
    # Pad embeddings to have the same length
    max_length = max(len(embed) for embed in embeddings)
    padded_embeddings = np.zeros((len(embeddings), max_length), dtype='float32')
    for idx, embed in enumerate(embeddings):
        padded_embeddings[idx, :len(embed)] = embed
    return padded_embeddings

def embed_query(input_grid, vocab):
    input_str = grid_to_string(input_grid)
    tokens = input_str.split()
    embedding = [vocab.get(token, 0) for token in tokens]
    return embedding  # Return as list

def pad_query_embedding(embedding, max_length):
    padded_embedding = np.zeros((1, max_length), dtype='float32')
    padded_embedding[0, :len(embedding)] = embedding
    return padded_embedding

def grids_are_equal(grid1, grid2):
    return grid1 == grid2  # Works if grids are lists of lists

def main():
    # Paths to ARC dataset
    train_task_dir = 'data/training'  # Adjust this path if necessary

    # Load all training tasks
    print("Loading all training tasks...")
    all_tasks = load_arc_tasks(train_task_dir)

    # Shuffle the tasks
    random.shuffle(all_tasks)

    # Split into training and validation sets (e.g., 80% training, 20% validation)
    split_ratio = 0.8
    split_index = int(split_ratio * len(all_tasks))
    train_tasks = all_tasks[:split_index]
    validation_tasks = all_tasks[split_index:]

    print(f"Total tasks: {len(all_tasks)}")
    print(f"Training tasks: {len(train_tasks)}")
    print(f"Validation tasks: {len(validation_tasks)}")

    # Process training tasks
    print("Processing training tasks...")
    inputs, outputs = process_tasks(train_tasks)

    # Build embeddings
    print("Building embeddings...")
    embeddings, vocab = build_embeddings(inputs)
    embeddings = pad_embeddings(embeddings)

    # Build FAISS index
    print("Building FAISS index...")
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)

    # Testing on validation tasks
    print("Testing on validation tasks...")
    correct = 0
    total = 0

    for task in validation_tasks:
        if 'test' in task and task['test']:
            for test_example in task['test']:
                test_input_grid = test_example['input']
                expected_output_grid = test_example.get('output')

                if expected_output_grid is None:
                    continue  # Skip if there's no expected output

                # Embed the test input grid
                test_embedding = embed_query(test_input_grid, vocab)
                test_embedding = pad_query_embedding(test_embedding, embeddings.shape[1])

                # Search for the nearest neighbor
                D, I = index.search(test_embedding, k=1)
                nearest_index = I[0][0]
                predicted_output = outputs[nearest_index]

                # Compare predicted output to expected output
                is_correct = grids_are_equal(predicted_output, expected_output_grid)

                total += 1
                if is_correct:
                    correct += 1

                # Optionally, display the results for each test case
                print(f"\nTest Example {total}:")
                print("Test Input Grid:")
                for row in test_input_grid:
                    print(row)

                print("\nExpected Output Grid:")
                for row in expected_output_grid:
                    print(row)

                print("\nPredicted Output Grid:")
                for row in predicted_output:
                    print(row)

                print(f"\nIs the predicted output correct? {is_correct}")

    # Summary of results
    print(f"\nTotal validation examples: {total}")
    print(f"Correct predictions: {correct}")
    if total > 0:
        accuracy = correct / total * 100
        print(f"Validation Accuracy: {accuracy:.2f}%")
    else:
        print("No validation examples found.")

if __name__ == '__main__':
    main()


Loading all training tasks...
Total tasks: 400
Training tasks: 320
Validation tasks: 80
Processing training tasks...
Building embeddings...
Building FAISS index...
Testing on validation tasks...

Test Example 1:
Test Input Grid:
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 2, 2, 2, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 5, 0, 0, 2, 2, 2, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5]
[0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0]
[0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,