## Kaggle is slow - you'll have to wait **5 minutes** for it to install.

I suggest you to use our free Colab notebooks instead. I linked our Llama 3.1 8b Colab notebook here: [notebook](https://colab.research.google.com/drive/1Ys44kVvmeZtnICzWz0xgpRnrIOjZAuxp?usp=sharing)

In [1]:
# %%capture
# !pip install pip3-autoremove
# !pip-autoremove torch torchvision torchaudio -y
# !pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
# !pip install unsloth

In [2]:
deps_path = "/kaggle/input/unsloth-env/whl_packages"

In [3]:
%%capture
!pip install --no-index --find-links {deps_path} pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install --no-index --find-links {deps_path} torch 
!pip install --no-index --find-links {deps_path} torchvision   
!pip install --no-index --find-links {deps_path} torchaudio
!pip install --no-index --find-links {deps_path} xformers

In [4]:
%%capture
! pip install --no-index --find-links {deps_path} "unsloth[kaggle-new]"

### Tools

In [5]:
def print_former_k_dict(dct, former_k=1):
    for i, (k, v) in enumerate(dct.items()):
        if i == former_k:
            break
        print(k)
        print(v)
    print()

### 1. Load ARC dataset

In [6]:
DATA_ROOT = "/kaggle/input/arc-prize-2024/"

train_input_path = f'{DATA_ROOT}/arc-agi_training_challenges.json'
train_output_path = f'{DATA_ROOT}/arc-agi_training_solutions.json'

eval_input_path = f'{DATA_ROOT}/arc-agi_evaluation_challenges.json'
eval_output_path = f'{DATA_ROOT}/arc-agi_evaluation_solutions.json'

test_input_path = f'{DATA_ROOT}/arc-agi_test_challenges.json'
sample_path = f'{DATA_ROOT}/sample_submission.json'

path_dict = dict(
    train_input_path=train_input_path,
    train_output_path=train_output_path,
    eval_input_path=eval_input_path,
    eval_output_path=eval_output_path,
    test_input_path=test_input_path,
    sample_path=sample_path,
)

import os
for k, path in path_dict.items():
    print(k, os.path.isfile(path))

train_input_path True
train_output_path True
eval_input_path True
eval_output_path True
test_input_path True
sample_path True


### 2. Process ARC dataset

In [7]:
PromptTemplate = """
You are given pairs of 2D matrices representing grids. In each matrix, 0 indicates the background, while identical non-zero numbers form specific zones and patterns.  
Your task is to identify the transformation rule that links each input matrix to its corresponding output matrix in the Examples. Then, apply this rule to generate an output matrix for the Test Input Matrix.

Specifically, you need to follow the steps below:
1. Focus on the size relationship between the input matrix and the output matrix in the Examples. There must be a clear dependency between the sizes of the matrices. Based on this, you should accurately determine the size of the output matrix from the Test Input Matrix.
2. Understand the transformation rule between the input matrix and the output matrix. These transformations are based on information from regions formed by identical numbers. This includes absolute positions and shapes of regions, relative positional relationships between regions, etc. You must have a clear definition and description of this transformation rule (but do not output it).
3. Based on the clearly understood transformation rule, strictly follow the output matrix size determined in the first step to generate the output matrix.
4. You only need to output the output matrix.

Examples:
{TRAIN}

Test Input Matrix:
{TEST}
"""

In [8]:
import json
from tqdm import tqdm


def process_input(v):
    test_input_matrix = v["test"][0]
        
    examples_list = v["train"]
    examples = "\n".join([json.dumps(exp) for exp in examples_list])

    input_prompt = PromptTemplate.format(
        TRAIN=examples,
        TEST=test_input_matrix,
    )
    return input_prompt

def process_output(v):
    return json.dumps(v[0])

def process_input_output(input_path, output_path=None):
    
    with open(input_path, "r") as f:
        input_data = json.load(f)
        
    prompts = dict()
            
    for i, (k, v) in enumerate(tqdm(input_data.items())):
        input_prompt = process_input(v)
        prompts[k] = dict(
            input=input_prompt,
        )
        
    if output_path is not None:
        with open(output_path, "r") as f:
            output_data = json.load(f)
        
        for i, (k, v) in enumerate(tqdm(output_data.items())):
            output_prompt = process_output(v)
            prompts[k]["output"] = output_prompt
                                   
    return prompts

In [9]:
train_prompts = process_input_output(input_path=train_input_path, output_path=train_output_path)
eval_prompts = process_input_output(input_path=eval_input_path, output_path=eval_output_path)
test_prompts = process_input_output(input_path=test_input_path)
                               
# print_former_k_dict(train_prompts)
# print_former_k_dict(eval_prompts)
# print_former_k_dict(test_prompts)

100%|██████████| 400/400 [00:00<00:00, 4625.63it/s]
100%|██████████| 400/400 [00:00<00:00, 33995.01it/s]
100%|██████████| 400/400 [00:00<00:00, 3746.40it/s]
100%|██████████| 400/400 [00:00<00:00, 22025.44it/s]
100%|██████████| 100/100 [00:00<00:00, 4971.56it/s]


In [10]:
import torch
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla P100-PCIE-16GB. Max memory = 15.888 GB.
0.0 GB of memory reserved.


### Inference

In [11]:
def get_answer(model, tokenizer, prompt, max_new_tokens=512):
    
    messages = [
        {"role": "user", "content": prompt},
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True, # Must add for generation
        return_tensors = "pt",
    ).to("cuda")
    
#     print(inputs)

#     outputs = model.generate(input_ids = inputs, max_new_tokens = 64, use_cache = True,
#                              temperature = 1.5, min_p = 0.1)
    gen_tokens = model.generate(input_ids = inputs, max_new_tokens = max_new_tokens, use_cache = True,
                             do_sample=False)
    
#     print(gen_tokens)
    
    gen_texts = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
    
    input_texts = tokenizer.batch_decode(inputs, skip_special_tokens=True)
    answers = [
        gen_text[len(input_texts[idx]):].strip() for idx, gen_text in enumerate(gen_texts)
    ]
    
    answer = answers[0]
    
    return answer

### Load Lora Model

In [12]:
# model_path = "/kaggle/input/arc_lora_unsloth_llama-3.2-3b-instruct-bnb-4bit_e5/transformers/default/1"
model_path = "/kaggle/input/arc_lora_model_llama31_8b_instruct_r64/transformers/default/1"
# model_path = "lora_model"

In [13]:
from unsloth import FastLanguageModel

max_seq_length = 1024 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_path, # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
pass

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.11.5: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla P100-PCIE-16GB. Max memory: 15.888 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 6.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2024.11.5 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [14]:
input_path = eval_input_path
prompts = eval_prompts

In [15]:
# Temporally not split yet
import json
from tqdm import tqdm

with open(input_path, "r") as f:
    input_data = json.load(f)
    
ret = {}
    
for k in tqdm(input_data):
    
    ret[k] = [
        dict(attempt_1=[[0]], attempt_2=[[0]])
        for i in range(len(input_data[k]["test"]))
    ]
        
with open("submission.json", "w") as f:
    json.dump(ret, f)

100%|██████████| 400/400 [00:00<00:00, 307050.07it/s]


### Format output

In [16]:
# 由于输出长度、OOM 限制被截断了

def complete_matrix_string(matrix_string):
    # 统计方括号的数量
    open_brackets = matrix_string.count('[')
    close_brackets = matrix_string.count(']')

    # 计算缺失的右方括号数量，并补齐
    missing_brackets = open_brackets - close_brackets
    if missing_brackets > 0:
        matrix_string += ']' * missing_brackets

    # 验证字符串是否可以被解析为 JSON
    try:
        json.loads(matrix_string)
    except json.JSONDecodeError:
        return [[0]]

    return matrix_string

### Process

In [17]:
if_jump_large = False

In [None]:
from tqdm import tqdm
import torch

torch.cuda.empty_cache()

with open(input_path, "r") as f:
    input_data = json.load(f)

solutions = dict()

for i, (k, v) in enumerate(tqdm(prompts.items())):
    
    print(i, k)

    test_matrix = input_data[k]["test"][0]["input"]
    r = len(test_matrix)
    c = len(test_matrix[0])
    
    if if_jump_large:
        # Jump over too large matrix
        if r * c >= 400:
            print(r, c)
            solutions[k] = "[[0]]"
            solu = [[0]]
            ret[k] = [
                dict(attempt_1=solu, attempt_2=solu)
                for i in range(len(input_data[k]["test"]))
            ]
            continue
    
    # 只处理小的 case

#     print(k, v, sep=": ")
#     gt = v["output"]
    prompt = v["input"]
    
    try:
    
        answer = get_answer(model, tokenizer, prompt, max_new_tokens=max_seq_length)

    except Exception as e:
        print(i, k)
        print(e)
        
        torch.cuda.empty_cache()
        answer = "[[0]]"
    
    solutions[k] = answer
    try:
        solu = json.loads(solutions[k])
    except json.decoder.JSONDecodeError:
        solu = complete_matrix_string(solutions[k])
    ret[k] = [
        dict(attempt_1=solu, attempt_2=solu)
        for i in range(len(input_data[k]["test"]))
    ]
    
    if i % 20 == 0:
        print(i, k)
        print(prompt)
        print(answer)
        
    if i % 20 == 0:        
        print("saving...")
        with open("solutions.json", "w") as f:
            json.dump(solutions, f)
            
        with open("submission.json", "w") as f:
            json.dump(ret, f)
    
with open("solutions.json", "w") as f:
    json.dump(solutions, f)
    
with open("submission.json", "w") as f:
    json.dump(ret, f)