In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import asyncio
import os
import pandas as pd
import numpy as np

from hfppl.modeling import Model
from hfppl.distributions import LMContext, TokenCategorical
from hfppl.llms import CachedCausalLM
from hfppl.inference import smc_standard

from battleship.board import Board
from battleship.scoring import compute_score
from battleship.models import QuestionGenerationModel

In [None]:
# Load HF_AUTH_TOKEN from .hf_auth_token
with open(os.path.join("../", ".hf_auth_token"), "r") as f:
    os.environ["HF_AUTH_TOKEN"] = f.read().strip()

HF_AUTH_TOKEN = os.environ["HF_AUTH_TOKEN"]

In [None]:
# Initialize the HuggingFace model
lm = CachedCausalLM.from_pretrained("codellama/CodeLlama-7b-hf", auth_token=HF_AUTH_TOKEN)

# Prompting utils

In [None]:
df = pd.read_csv("../battleship/prompts/examples.csv")

def format_example(user_input: str, response: str = None):
    return f"User: {user_input}\n" f"Assistant:{' ' + response if response else ''}"

def make_question_prompt(df):
    prompt = "\n".join(df.question) + "\n"
    return prompt

def make_question_to_code_prompt(df):
    prompt = "\n".join([format_example(q, r) for q, r in zip(df.question, df.code)]) + "\n"
    return prompt

In [None]:
print(make_question_prompt(df))

In [None]:
print(make_question_to_code_prompt(df))

# Single board evaluation

In [None]:
board = Board.from_trial_id(5)
board

In [None]:
# Create a model instance
model = QuestionGenerationModel(
    lm=lm,
    board=board,
    question_prompt=make_question_prompt(df),
    translation_prompt=make_question_to_code_prompt(df),
)

In [None]:
particles = await smc_standard(model, n_particles=3)


In [None]:
df_results = []
for i, p in enumerate(particles):
    df_p = pd.DataFrame(p.get_final_results())
    df_p["particle"] = i
    df_results.append(df_p)
df_results = pd.concat(df_results).reset_index(drop=True)
df_results

In [None]:
df_results.sort_values("score", ascending=False).head(10)

In [None]:
df_results.query("type == 'final'")

In [None]:
for p in particles:
    print(f"Question: {str(p.context)}")
    print(f"|- Program: {p.result['translation']}")
    print(f"|- EIG: {compute_score(board=board, program=p.result['translation'])}")
    print(f"|- Particle weight: {p.weight:.4f}")
    print()

# Multiple board evaluation

In [None]:
TRIAL_IDS = range(3, 19)
N_PARTICLES = 3

df_results = []
for trial_id in TRIAL_IDS:
    print("-" * 80)
    print(f"Trial {trial_id}")
    print("-" * 80)
    board = Board.from_trial_id(trial_id)
    model = QuestionGenerationModel(
        lm=lm,
        board=board,
        question_prompt=make_question_prompt(df),
        translation_prompt=make_question_to_code_prompt(df),
    )
    particles = await smc_standard(model, n_particles=N_PARTICLES)
    df_trial = []
    for i, p in enumerate(particles):
        df_p = pd.DataFrame(p.get_final_results())
        df_p["particle"] = i
        df_trial.append(df_p)
    df_trial = pd.concat(df_trial).reset_index(drop=True)
    df_trial["trial_id"] = trial_id
    df_results.append(df_trial)
df_results = pd.concat(df_results).reset_index(drop=True)
df_results.to_csv("hfppl_results.csv", index=False)

In [None]:
df_results