In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import asyncio
import os
import pandas as pd

from hfppl.llms import CachedCausalLM
from hfppl.inference import smc_standard

from battleship.board import Board
from battleship.scoring import compute_score
from battleship.models import QuestionGenerationModel

In [None]:
# Load HF_AUTH_TOKEN from .hf_auth_token
with open(os.path.join("../", ".hf_auth_token"), "r") as f:
    os.environ["HF_AUTH_TOKEN"] = f.read().strip()

HF_AUTH_TOKEN = os.environ["HF_AUTH_TOKEN"]

In [None]:
# Initialize the HuggingFace model
lm = CachedCausalLM.from_pretrained("codellama/CodeLlama-13b-hf", auth_token=HF_AUTH_TOKEN)

# Prompting utils

In [None]:
df = pd.read_csv("../battleship/prompts/examples.csv")

def format_example(user_input: str, response: str = None):
    return f"User: {user_input}\n" f"Assistant:{' ' + response if response else ''}"

def make_question_prompt(df, board=None, instructions=None):
    prompt = ""
    if instructions != None:
        prompt += f"Instructions:\n{instructions}\n"
    if board != None:
        prompt += "Board:\n" + board.to_textual_description() + "\n"
    prompt += "Questions:\n" + "\n".join(df.question) + "\n"
    return prompt

def make_question_to_code_prompt(df):
    prompt = "\n".join([format_example(q, r) for q, r in zip(df.question, df.code)]) + "\n"
    return prompt

In [None]:
instructions = "User input will be a series of sentences representing a board from Battleship, the board game, that you should aim to win. Tiles in the board can either be 'Water' tiles, 'Blue Ship' tiles, 'Red Ship' tiles, and 'Purple Ship' tiles (there are only these three battleships). Some tiles may also be 'Hidden' tiles, meaning they could be any of the others but have not been revealed yet. The user will denote coordinates as follows: columns are numbered from 1 onwards, where column 1 is the leftmost column, and rows are given a letter from A onwards where row A is the topmost row (so the cell at the second row and second column is B2). Your role is to ask the most informative possible question from the context given: strictly output the question only, and make sure the questions are relevant to the context: 'Which cells should I target to sink the battleships with the least number of moves?' is not a relevant question because it is the general goal of Battleship. Questions also need to be answerable with yes or no, no other questions will be considered in scope."

# Single board evaluation

In [None]:

async def single_smc_baseline(board_id,particle_num,instructions):
    board = Board.from_trial_id(board_id)
    board.to_textual_description()

    model = QuestionGenerationModel(
        lm=lm,
        board=board,
        question_prompt=make_question_prompt(df),
        translation_prompt=make_question_to_code_prompt(df),
    )

    model_combined = QuestionGenerationModel(
        lm=lm,
        board=board,
        question_prompt=make_question_prompt(df,board=board,instructions=instructions),
        translation_prompt=make_question_to_code_prompt(df),
    )

    particles = await smc_standard(model, n_particles=particle_num)
    print("Done with standard model...")
    particles_c = await smc_standard(model_combined, n_particles=particle_num)
    print("Done with combined model...")
    return [particles,particles_c]

In [None]:
df_results = []
particle_types = [particles,particles_c]

for particle_type in particle_types:
    for i, p in enumerate(particle_type):
        df_p = pd.DataFrame(p.get_final_results())
        df_p["particle"] = i
        df_results.append(df_p)
    df_results = pd.concat(df_results).reset_index(drop=True)
df_results

In [None]:
for p in particles:
    print(f"Question: {str(p.context)}")
    print(f"|- Program: {p.result['translation']}")
    print(f"|- EIG: {compute_score(board=board, program=p.result['translation'])}")
    print(f"|- Particle weight: {p.weight:.4f}")
    print()

# Multiple board evaluation

In [None]:
async def run_smc_baseline(n_particles=5, trial_ids=range(1, 19), model_types=["REGULAR", "COMBINED"]):
    df_results = []
    for trial_id in trial_ids:
        for model_type in model_types:
            print("-" * 80)
            print(f"Trial {trial_id}")
            print(f"Model type: {model_type}")
            print("-" * 80)
            board = Board.from_trial_id(trial_id)
            instructions_used = None if model_type == "REGULAR" else instructions
            model = QuestionGenerationModel(
                    lm=lm,
                    board=board,
                    question_prompt=make_question_prompt(df, board=board, instructions=instructions_used),
                    translation_prompt=make_question_to_code_prompt(df),
                )
            particles = await smc_standard(model, n_particles=n_particles)
            df_trial = []
            for i, p in enumerate(particles):
                df_p = pd.DataFrame(p.get_final_results())
                df_p["particle"] = i
                df_p["model_type"] = model_type
                df_trial.append(df_p)
            df_trial = pd.concat(df_trial).reset_index(drop=True)
            df_trial["trial_id"] = trial_id
            df_results.append(df_trial)
            df_results = pd.concat(df_results).reset_index(drop=True)
            df_results.to_csv("hfppl_results.csv", index=False)

In [None]:
TRIAL_IDS = range(1, 19)
N_PARTICLES = 5

await run_smc_baseline(n_particles=N_PARTICLES, trial_ids=TRIAL_IDS)

# One-step SMC

In [None]:
# TRIAL_IDS = range(1, 19)
TRIAL_IDS = [13]
N_PARTICLES = 1

await run_smc_baseline(n_particles=N_PARTICLES, trial_ids=TRIAL_IDS, model_types=["REGULAR"])