# Environment initialization

In [None]:
import os

import wandb

from solvers import ZeroShotGPT
from structs import DataSet

In [None]:
solver = ZeroShotGPT()

In [None]:
os.environ["WANDB_SILENT"] = "true"

wandb.init(
    project="brainteasers",
    config={
        "solver": "ZeroShotGPT"
    }
)

# Evaluation on Sentence Puzzle

In [None]:
sp = DataSet.from_file("../data/SP-train.pkl")

sp_answers = solver.solve(sp)
sp_are_answers_correct = [instance.is_choice_correct(answer) for instance, answer in zip(sp, sp_answers)]
sp_accuracy = sum(sp_are_answers_correct) / len(sp_are_answers_correct)

print(f"Accuracy on the Sentence Puzzle dataset: {sp_accuracy: .4f}")

# Evaluation on Word Puzzle

In [None]:
wp = DataSet.from_file("../data/WP-train.pkl")

wp_answers = solver.solve(wp)
wp_are_answers_correct = [instance.is_choice_correct(answer) for instance, answer in tqdm(zip(wp, wp_answers))]
wp_accuracy = sum(wp_are_answers_correct) / len(wp_are_answers_correct)

print(f"Accuracy on the Word Puzzle dataset: {wp_accuracy: .4f}")

# Logging metrics

In [None]:
wandb.log(
    {
        "accuracy/sp": sp_accuracy,
        "accuracy/wp": wp_accuracy,
    }
)

wandb.finish(quiet=True)