In [1]:
# conda environment (project)
# /home/student/.conda/envs/project/bin/python 
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import os, gc
import torch

from transformers import set_seed
from datasets import load_dataset
from evaluate import *
from arc import ARCSolver

from datasets import Dataset
from utils import render_grid

In [2]:
# prepare the test dataset
data_path = "dataset"
dataset, task_list = load_data(data_path)
df = sample_data(dataset, task_list, n_row=10000) 
df.head(5) 

FileNotFoundError: [Errno 2] No such file or directory: 'dataset'

In [None]:
# prepare samples for each task
task_samples = []
for t in range(300):
    df = sample_data(dataset, task_list, n_row=1000, indices=[t])
    task_samples.append(df)

In [None]:
# Visualize a task (EDA)
task_idx = 39 # select which task you want to examine
n_sample = 1
for data in Dataset.from_pandas(task_samples[task_idx]).shuffle().select(range(n_sample)):
    for case in data['train']:
        print("==================================================")
        print("Example input")
        render_grid(case['input'])
        print("Example output")
        render_grid(case['output'])
    print("==================================================")
    print("Example test input")
    render_grid(data['test'][0]['input'])
    print("Example test output")
    render_grid(data['test'][0]['output'])
print("==================================================")

In [None]:
# load our model(arcsolver) instance
set_seed(1234567890)
token = os.environ.get("HF_TOKEN", None)
solver = ARCSolver(token=token) # default: finetuning-sample (given sample adapter)

In [None]:
solver.prepare_train()
n_train = 500
train_dataset = Dataset.from_pandas(task_samples[task_idx]).shuffle(42).select(range(n_train))
solver.train(train_dataset)

In [None]:
n_eval = 10
eval_dataset = Dataset.from_pandas(task_samples[task_idx]).shuffle(42).select(range(n_train,n_train+n_eval))
solver.prepare_evaluation(select_adapter="20250527_100116") # make sure you set the right model

In [None]:
# evaluate our model (eval set)
scores = []
for eval_data in tqdm(eval_dataset):
    print("============================================")
    print("Test input")
    render_grid(eval_data["test"][0]['input'])

    print("Predict output")
    preds = solver.predict(eval_data)
    render_grid(preds)

    print("Test output")
    render_grid(eval_data["test"][0]['output'])
    print("============================================")
    if preds is None: s = 0
    else: s = check_match(preds, eval_data["test"][0]["output"])
    scores.append(s)

score = np.array(scores).mean() * 100
print(f"Evaluation scores: {score:.2f}", flush=True)
print("Evaluation Success")

In [None]:
solver.prepare_train()
n_train = 10000
df20 = sample_data(dataset, task_list, n_row=10500, indices=list(range(20)))
train_dataset = Dataset.from_pandas(df20).select(range(n_train))
solver.train(train_dataset)

In [None]:
n_eval = 20
solver.prepare_evaluation(select_adapter="20250527_011623") # make sure you set the right model

In [None]:
# from safetensors import safe_open
# from safetensors.torch import save_file

# safetensors 파일 열기
# with safe_open("artifacts/20250527_011623/checkpoint-final/adapter_model.safetensors", framework="pt", device="cpu") as f:
#     for k in f.keys(): print(k)
#     state_dict = {k: f.get_tensor(k) for k in f.keys()}

# fixed_state_dict = {}
# for key in state_dict:
#     new_key = key.replace("base_model.model.base_model.model.model", "base_model.model.model")
#     fixed_state_dict[new_key] = state_dict[key]

# save_file(fixed_state_dict, "artifacts/20250527_011623/checkpoint-final/adapter_model_fixed.safetensors")

In [None]:
# evaluate our model (eval set)
scores = []
scores_task = []
for task in range(20,40):
    eval_dataset = Dataset.from_pandas(task_samples[task]).shuffle(42).select(range(n_eval))
    for eval_data in tqdm(eval_dataset):
        # print("============================================")
        # print("Test input")
        # render_grid(eval_data["test"][0]['input'])

        # print("Predict output")
        preds = solver.predict(eval_data)
        # render_grid(preds)

        # print("Test output")
        # render_grid(eval_data["test"][0]['output'])
        # print("============================================")
        if preds is None: s = 0
        else: s = check_match(preds, eval_data["test"][0]["output"])
        scores.append(s)
    score = np.array(scores).mean() * 100
    scores_task.append(score)
    print(f"Evaluation score: {score:.2f}", flush=True)
    scores = []

from matplotlib import pyplot as plt
x = np.arange(20,40)
plt.bar(x, scores_task)
plt.xticks(x, list(range(20,40)))
plt.show()