# Library

In [1]:
import pandas as pd
import numpy as np

from pathlib import Path

from matplotlib import colors
import matplotlib.pyplot as plt

In [2]:
from unet.data_loader.read_file import read_json

# Function

In [3]:
def plot_task(
    task: dict, 
    task_solutions: list, 
    idx: int, 
    key: str,
):
    """
    Plots the train and test pairs of a specified task,
    using same color scheme as the ARC app.

    Parameters
    ----------
    task
        "test": [{"input": [], "output": []}]
        "train": [{"input": [], "output": []}, ...]
    task_solutions: List[List[int]]
        n x n matrix
    idx: int
        index of the task
    key: str
        key of the task
    """
    num_train = len(task['train'])
    num_test  = len(task['test'])

    w=num_train+num_test
    fig, axs  = plt.subplots(2, w, figsize=(3*w ,3*2))
    plt.suptitle(f'Set #{idx}, {key}:', fontsize=20, fontweight='bold', y=1)

    for j in range(num_train):
        plot_one(task, axs[0, j], j,'train', 'input')
        plot_one(task, axs[1, j], j,'train', 'output')

    plot_one(task, axs[0, j+1], 0, 'test', 'input')

    cmap = colors.ListedColormap(['#000000', '#0074D9', '#FF4136', '#2ECC40', '#FFDC00',
                                  '#AAAAAA', '#F012BE', '#FF851B', '#7FDBFF', '#870C25'])
    norm = colors.Normalize(vmin=0, vmax=9)
    answer = task_solutions
    input_matrix = answer

    axs[1, j+1].imshow(input_matrix, cmap=cmap, norm=norm)
    axs[1, j+1].grid(True, which = 'both',color = 'lightgrey', linewidth = 0.5)
    axs[1, j+1].set_yticks([x-0.5 for x in range(1 + len(input_matrix))])
    axs[1, j+1].set_xticks([x-0.5 for x in range(1 + len(input_matrix[0]))])
    axs[1, j+1].set_xticklabels([])
    axs[1, j+1].set_yticklabels([])
    axs[1, j+1].set_title('TEST OUTPUT', color = 'green', fontweight='bold')

    fig.patch.set_linewidth(5)
    fig.patch.set_edgecolor('black')  # substitute 'k' for black
    fig.patch.set_facecolor('#dddddd')

    plt.tight_layout()
    plt.show()

    print()
    print()


def plot_one(
    task: dict, 
    ax, 
    i: int, 
    train_or_test: str, 
    input_or_output: str,
):
    cmap = colors.ListedColormap(['#000000', '#0074D9', '#FF4136', '#2ECC40', '#FFDC00',
                                  '#AAAAAA', '#F012BE', '#FF851B', '#7FDBFF', '#870C25'])
    norm = colors.Normalize(vmin=0, vmax=9)
    input_matrix = task[train_or_test][i][input_or_output]
    ax.imshow(input_matrix, cmap=cmap, norm=norm)
    ax.grid(True, which = 'both',color = 'lightgrey', linewidth = 0.5)

    plt.setp(plt.gcf().get_axes(), xticklabels=[], yticklabels=[])
    ax.set_xticks([x-0.5 for x in range(1 + len(input_matrix[0]))])
    ax.set_yticks([x-0.5 for x in range(1 + len(input_matrix))])
    ax.set_title(train_or_test + ' ' + input_or_output, fontweight='bold')

In [4]:
from typing import List


def pad_matrix(
    matrix: List[List[int]], 
    target_shape=(30, 30), 
    pad_value=-1
):
    """
    Pad the input matrix to the target shape, placing the original values in the center.

    Parameters
    ----------
    matrix : numpy.ndarray
        The input matrix to be padded.
    target_shape : tuple, optional
        The desired shape of the output matrix. Default is (30, 30).
    pad_value : int or float, optional
        The value to use for padding. Default is -1.

    Returns
    -------
    numpy.ndarray
        The padded matrix with the original values in the center.

    Examples
    --------
    >>> import numpy as np
    >>> from your_module import pad_matrix
    >>> input_matrix = np.array([[1, 2], [3, 4]])
    >>> padded_matrix = pad_matrix(input_matrix)
    >>> print(padded_matrix.shape)
    (30, 30)
    """
    matrix = np.array(matrix)
    n, p = matrix.shape
    target_n, target_p = target_shape

    pad_n = (target_n - n) // 2
    pad_p = (target_p - p) // 2

    padded_matrix = np.full(target_shape, pad_value)
    padded_matrix[pad_n:pad_n+n, pad_p:pad_p+p] = matrix

    return padded_matrix

# Dataset Path

In [5]:
DATASET_PATH = Path("./data")
ARC_PRICE_DATASET_PATH  = DATASET_PATH / "arc-price-2024"

In [6]:
file_name_training_challenges = "arc-agi_training_challenges.json"
file_name_training_solutions = "arc-agi_training_solutions.json"

In [7]:
training_challenges = read_json(ARC_PRICE_DATASET_PATH / file_name_training_challenges)
training_solutions = read_json(ARC_PRICE_DATASET_PATH / file_name_training_solutions)

In [8]:
from collections import defaultdict


train_inputs = defaultdict(list)
train_outputs = defaultdict(list)

train_inputs_flatten = []
train_outputs_flatten = []

"""training_challenges structure
{
    task_name_1: {
        "train": [
            {
                "input": input_matrix,
                "output": output_matrix,
            }
        ]
        "test": [
            {
                "input": input_matrix,
            }
        ]
    },
    task_name_2: ...
}
"""

for task_name in training_challenges:
    task = training_challenges[task_name]
    for train_observation in task["train"]:
        # by task
        train_inputs[task_name].append(
            pad_matrix(train_observation["input"])  # to 30x30
        )
        train_outputs[task_name].append(
            pad_matrix(train_observation["output"])  # to 30x30
        )

        # flatten
        train_inputs_flatten.append(
            pad_matrix(train_observation["input"])  # to 30x30
        )
        train_outputs_flatten.append(
            pad_matrix(train_observation["output"])  # to 30x30
        )


In [14]:
from unet.data_loader.data_loader import ImageDataset
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor, Lambda

batch_size = 4  # TODO: 추후 수정


transform = transforms.Compose(
    [
        ToTensor(),
        Lambda(lambda x: x.float()),  # 명시적으로 float 타입으로 변환
    ]
)
dataset_train = ImageDataset(
    input_data=train_inputs_flatten,
    output_data=train_outputs_flatten,
    transform=transform,
)
loader_train = DataLoader(
    dataset_train,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4,
)

In [15]:
for i, o in loader_train:
    print(i.shape)
    print(o.shape)
    break


torch.Size([4, 1, 30, 30])
torch.Size([4, 1, 30, 30])


In [16]:
from unet.unet import UNet
from unet.train import Trainer

model = UNet()
trainer = Trainer(
    model=model,
    train_loader=loader_train,
    val_loader=loader_train,
    save_path="model.pth",
)


In [17]:
trainer.train(num_epochs=10)

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 2 but got size 3 for tensor number 1 in the list.