In [1]:
from minichat.common import download_file_with_lock, place_eval_bundle
from minichat.gpt import GPT, GPTConfig
from minichat.tokenizer import get_tokenizer
import os
import csv
import yaml
from minichat.common import get_base_dir
import json
from minichat.core_eval import evaluate_task

tokenizer = get_tokenizer()


EVAL_BUNDLE_URL = "https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip"

download_file_with_lock(
    EVAL_BUNDLE_URL,
    "eval_bundle.zip",
    postprocess_fn=place_eval_bundle
)

config = GPTConfig(
    sequence_len = 512,
    n_layers = 12,
    vocab_size =  tokenizer.get_vocab_size(),
    emb_dim = 128,
    n_heads = 8
)

model = GPT(config)

model.to('cuda')

eval_bundle_dir = os.path.join(get_base_dir(), "eval_bundle")

config_path = os.path.join(eval_bundle_dir, "core.yaml")
data_base_path = os.path.join(eval_bundle_dir, "eval_data")
eval_meta_data = os.path.join(eval_bundle_dir, "eval_meta_data.csv")

random_baselines = {}
with open(eval_meta_data, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        task_name = row['Eval Task']
        random_baseline = row['Random baseline']
        random_baselines[task_name] = float(random_baseline)

with open(config_path, 'r', encoding='utf-8') as f:
    config = yaml.safe_load(f)
tasks = config['icl_tasks']

task_meta = {
    'task_type': tasks[1]['icl_task_type'],
    'dataset_uri': tasks[1]['dataset_uri'],
    'num_fewshot': tasks[1]['num_fewshot'][0],
    'continuation_delimiter': tasks[1].get('continuation_delimiter', ' ')
}

data_path = os.path.join(data_base_path, task_meta['dataset_uri'])

with open(data_path, 'r', encoding='utf-8') as f:
    data = [json.loads(line.strip()) for line in f.readlines()]

is_correct = evaluate_task(data, task_meta, model, tokenizer, device='cuda')