In [12]:
import json
import numpy as np
import random
import tqdm as tqdm

import os
from utils.os import get_root_path

In [13]:
# 加载 JSON 文件
base_path="datas/Kaggle/arc-prize-2024/"
# Loading JSON data
def load_json(file_path):
    full_path = os.path.join(get_root_path(), file_path)
    with open(full_path) as f:
        data = json.load(f)
    return data

# Reading files
training_challenges = load_json(base_path +'arc-agi_training_challenges.json')
training_solutions = load_json(base_path +'arc-agi_training_solutions.json')
evaluation_challenges = load_json(base_path +'arc-agi_evaluation_challenges.json')
evaluation_solutions = load_json(base_path +'arc-agi_evaluation_solutions.json')
test_challenges = load_json(base_path +'arc-agi_test_challenges.json')
sample_submission = load_json(base_path + 'sample_submission.json')

In [14]:
# 将数据集加载为 numpy 数组
def process_challenge(challenge_id, challenges, solutions=None):
    """
    Args:
        challenge_id: 问题 ID
        challenges: 问题集
        solutions: 解集，如果没有则为 None
    """
    one_challenge = challenges[challenge_id]
    
    puzzle_ins_train = []
    puzzle_outs_train = []
    for puzzles in one_challenge['train']:
        # 将输入和输出转换为 numpy 数组
        # convert to numpy array before you append, so we can see it as a matrix
        puzzle_ins_train.append(np.array(puzzles['input']))
        puzzle_outs_train.append(np.array(puzzles['output']))
    
    puzzle_ins_test = []
    for puzzles in one_challenge['test']:
        puzzle_ins_test.append(np.array(puzzles['input']))
        
    if solutions is not None:   # 测试集没有 solution，所以需要判断一下
        one_solution = solutions[challenge_id]
        puzzle_outs_test = []
        for puzzles in one_solution:
            puzzle_outs_test.append(np.array(puzzles))
        return puzzle_ins_train, puzzle_outs_train, puzzle_ins_test, puzzle_outs_test 
    else:
        return puzzle_ins_train, puzzle_outs_train, puzzle_ins_test

# 训练

In [53]:
import matplotlib.pyplot as plot
from matplotlib import colors

def display_array(ax: plot.Axes, arr: np.ndarray, cmap=None, norm=None):
    ax.axis("off")
    ax.imshow(arr, cmap=cmap, norm=norm)
    ax.set_xticks(range(arr.shape[1]))
    ax.set_yticks(range(arr.shape[0]))

# 设置颜色映射
cmap = colors.ListedColormap([
    '#000000', '#0074D9', '#FF4136', '#2ECC40', '#FFDC00',
    '#AAAAAA', '#F012BE', '#FF851B', '#7FDBFF', '#870C25'
])
norm = colors.Normalize(vmin=0, vmax=9)  # 颜色映射范围

challenges = training_challenges
solutions = training_solutions
train_ids = list(challenges)

for _id in train_ids:
    puzzle_ins_train, puzzle_outs_train, puzzle_ins_test, puzzle_outs_test \
        = process_challenge(_id, challenges, solutions)
    
    space = 0.1
    fig, axes = plot.subplots(nrows=len(puzzle_ins_train) + len(puzzle_ins_test), ncols=2)
    plot.subplots_adjust(wspace=space, hspace=space)    # 调整所有子图 width space 间隔为 0
    for i in np.arange(len(puzzle_ins_train)):
        display_array(axes[i, 0], puzzle_ins_train[i], cmap=cmap, norm=norm)
        display_array(axes[i, 1], puzzle_outs_train[i], cmap=cmap, norm=norm)
    for i in np.arange(len(puzzle_ins_test)):
        display_array(axes[len(puzzle_ins_train) + i, 0], puzzle_ins_test[i], cmap=cmap, norm=norm)
        display_array(axes[len(puzzle_ins_train) + i, 1], puzzle_outs_test[i], cmap=cmap, norm=norm)
        
    full_path = os.path.join(get_root_path(), base_path, "train", _id + ".png")
    os.makedirs(os.path.dirname(full_path), exist_ok=True)
    plot.savefig(full_path, dpi=300)
    plot.close()

In [40]:
def get_score(model_answers, right_answers):
    """
    判断模型的答案是否正确
    Args:
        model_answers: 模型的回答
        right_answers: 正确的答案
    """
    dice = 0
    for i, answer in enumerate(model_answers):  # 遍历模型的所有答案，模型有可能输出两个答案
        if answer.shape != right_answers[i].shape:
            continue
            
        score = ((answer == right_answers[i]).astype(int)).mean()      
        if score == 1.0:
            dice += 1
    
    return int(dice / len(right_answers))

# 测试模型准确度
total_score = 0
ids_evaluation = list(evaluation_challenges)
for i, challenge_id in enumerate(tqdm.tqdm(ids_evaluation)):
    puzzle_ins_train, puzzle_outs_train, puzzle_ins_test, puzzle_outs_test = (
        process_challenge(challenge_id, evaluation_challenges, evaluation_solutions))
    
    answers = puzzle_outs_test# dumb_dumb(puzzle_ins_train, puzzle_outs_train, puzzle_ins_test, puzzle_outs_test)
    total_score += get_score(answers, puzzle_outs_test)
    
print(f"\ntotal_score: {total_score}/{len(ids_evaluation)}, accuracy: {total_score/len(ids_evaluation):5f}\n")

In [None]:
# 提交教程：https://www.kaggle.com/code/anrenk/submission-example