In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import asyncio
import os
import pandas as pd
import numpy as np

from hfppl.modeling import Model
from hfppl.distributions import LMContext, TokenCategorical
from hfppl.llms import CachedCausalLM
from hfppl.inference import smc_standard

from battleship.board import Board
from battleship.scoring import compute_score
from battleship.models import QuestionGenerationModel

In [3]:
# Load HF_AUTH_TOKEN from .hf_auth_token
HF_AUTH_TOKEN = os.environ["HF_AUTH_TOKEN"]

In [4]:
# Initialize the HuggingFace model
lm = CachedCausalLM.from_pretrained("codellama/CodeLlama-13b-hf", auth_token=HF_AUTH_TOKEN)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



# Prompting utils

In [5]:
df = pd.read_csv("../battleship/prompts/examples.csv")

def format_example(user_input: str, response: str = None):
    return f"User: {user_input}\n" f"Assistant:{' ' + response if response else ''}"

def make_question_prompt(df, board=None, instructions=None):
    prompt = ""
    if instructions != None:
        prompt += f"Instructions:\n{instructions}\n"
    if board != None:
        prompt += "Board:\n" + board.to_textual_description() + "\n"
    prompt += "Questions:\n" + "\n".join(df.question) + "\n"
    return prompt

def make_question_to_code_prompt(df):
    prompt = "\n".join([format_example(q, r) for q, r in zip(df.question, df.code)]) + "\n"
    return prompt

In [6]:
instructions = "User input will be a series of sentences representing a board from Battleship, the board game, that you should aim to win. Tiles in the board can either be 'Water' tiles, 'Blue Ship' tiles, 'Red Ship' tiles, and 'Purple Ship' tiles (there are only these three battleships). Some tiles may also be 'Hidden' tiles, meaning they could be any of the others but have not been revealed yet. The user will denote coordinates as follows: columns are numbered from 1 onwards, where column 1 is the leftmost column, and rows are given a letter from A onwards where row A is the topmost row (so the cell at the second row and second column is B2). Your role is to ask the most informative possible question from the context given: strictly output the question only, and make sure the questions are relevant to the context: 'Which cells should I target to sink the battleships with the least number of moves?' is not a relevant question because it is the general goal of Battleship. Questions also need to be answerable with yes or no, no other questions will be considered in scope."

# Single board evaluation

In [14]:
board = Board.from_trial_id(5)
board.to_textual_description()

'1-A is a hidden tile.\n1-B is a hidden tile.\n1-C is a purple ship tile.\n1-D is a hidden tile.\n1-E is a hidden tile.\n1-F is a hidden tile.\n2-A is a hidden tile.\n2-B is a hidden tile.\n2-C is a water tile.\n2-D is a hidden tile.\n2-E is a hidden tile.\n2-F is a hidden tile.\n3-A is a hidden tile.\n3-B is a hidden tile.\n3-C is a hidden tile.\n3-D is a water tile.\n3-E is a hidden tile.\n3-F is a hidden tile.\n4-A is a hidden tile.\n4-B is a hidden tile.\n4-C is a hidden tile.\n4-D is a blue ship tile.\n4-E is a hidden tile.\n4-F is a hidden tile.\n5-A is a hidden tile.\n5-B is a hidden tile.\n5-C is a hidden tile.\n5-D is a blue ship tile.\n5-E is a hidden tile.\n5-F is a hidden tile.\n6-A is a hidden tile.\n6-B is a hidden tile.\n6-C is a hidden tile.\n6-D is a blue ship tile.\n6-E is a hidden tile.\n6-F is a hidden tile.'

In [9]:
# Create a model instance

model = QuestionGenerationModel(
    lm=lm,
    board=board,
    question_prompt=make_question_prompt(df),
    translation_prompt=make_question_to_code_prompt(df),
)


model_board = QuestionGenerationModel(
    lm=lm,
    board=board,
    question_prompt=make_question_prompt(df,board=board),
    translation_prompt=make_question_to_code_prompt(df),
)

model_instructions = QuestionGenerationModel(
    lm=lm,
    board=board,
    question_prompt=make_question_prompt(df,instructions=instructions),
    translation_prompt=make_question_to_code_prompt(df),
)

model_combined = QuestionGenerationModel(
    lm=lm,
    board=board,
    question_prompt=make_question_prompt(df,board=board,instructions=instructions),
    translation_prompt=make_question_to_code_prompt(df),
)

In [None]:
particle_num = 3

particles = await smc_standard(model, n_particles=particle_num)
print("Done with standard model...")
particles_b = await smc_standard(model_board, n_particles=particle_num)
print("Done with board model...")
particles_i = await smc_standard(model_instructions, n_particles=particle_num)
print("Done with instruction model...")
particles_c = await smc_standard(model_combined, n_particles=particle_num)
print("Done with combined model...")

In [None]:
df_results = []
for i, p in enumerate(particles):
    df_p = pd.DataFrame(p.get_final_results())
    df_p["particle"] = i
    df_results.append(df_p)
df_results = pd.concat(df_results).reset_index(drop=True)
df_results

In [None]:
df_results.sort_values("score", ascending=False).head(10)

In [None]:
df_results.query("type == 'final'")

In [None]:
for p in particles:
    print(f"Question: {str(p.context)}")
    print(f"|- Program: {p.result['translation']}")
    print(f"|- EIG: {compute_score(board=board, program=p.result['translation'])}")
    print(f"|- Particle weight: {p.weight:.4f}")
    print()

# Multiple board evaluation

In [7]:
TRIAL_IDS = range(1, 19)
N_PARTICLES = 5

for model_type in ["REGULAR","COMBINED"]:
    for trial_id in TRIAL_IDS:
        df_results = []
        print("-" * 80)
        print(f"Trial {trial_id}")
        print("-" * 80)
        board = Board.from_trial_id(trial_id)
        if model_type == "REGULAR":
            model = QuestionGenerationModel(
                lm=lm,
                board=board,
                question_prompt=make_question_prompt(df, board=None, instructions=None),
                translation_prompt=make_question_to_code_prompt(df),
            )
        elif model_type == "COMBINED":
            model = QuestionGenerationModel(
                lm=lm,
                board=board,
                question_prompt=make_question_prompt(df, board=board, instructions=instructions),
                translation_prompt=make_question_to_code_prompt(df),
            )
        particles = await smc_standard(model, n_particles=N_PARTICLES)
        df_trial = []
        for i, p in enumerate(particles):
            df_p = pd.DataFrame(p.get_final_results())
            df_p["particle"] = i
            df_p["model_type"] = model_type
            df_trial.append(df_p)
        df_trial = pd.concat(df_trial).reset_index(drop=True)
        df_trial["trial_id"] = trial_id
        df_results.append(df_trial)
        df_results = pd.concat(df_results).reset_index(drop=True)
        if not os.path.isfile('hfppl_results.csv'):
            df_results.to_csv('hfppl_results.csv', header='column_names', index=False)
        else:
            df_results.to_csv("hfppl_results.csv", mode="a", index=False)

--------------------------------------------------------------------------------
Trial 1
--------------------------------------------------------------------------------
Partial question: Is
|- EIG mean: 0.7563
|- EIG max: 0.7563
|- Particle weight: 0.7563
  |- Completion: Is the red ship touching the blue ship?
    |- Translation: (touch Red Blue)
    |- Score: 0.7563
  |- Completion: Is the blue ship touching the red ship?
    |- Translation: (touch Blue Red)
    |- Score: 0.7563
  |- Completion: Is the red ship touching the blue ship?
    |- Translation: (touch Red Blue)
    |- Score: 0.7563

Partial question: How
|- EIG mean: 0.4536
|- EIG max: 1.3608
|- Particle weight: 1.3608
  |- Completion: How many ships are there?
    |- Translation: (++ (set AllColors))
    |- Score: 0.0000
  |- Completion: How many blocks is the blue ship?
    |- Translation: (size Blue)
    |- Score: 1.3608
  |- Completion: How many coordinates contain a ship?
    |- Translation: (++ (map (lambda x0 (lengt

In [21]:
df_results

Unnamed: 0,prefix,completion,translation,score,type,particle,model_type,trial_id
0,Is the large blue ship horizontal and vertical?,Is the large blue ship horizontal and vertical?,(and (== (orient LargeBlue) H) (== (orient Lar...,0.0,final,0,COMBINED,1
1,Is,Is the purple ship next to the blue ship?,(touch Purple Blue),0.756291,rollout,0,COMBINED,1
2,Is,Is there a ship in the top row?,(any (map (lambda x0 (== (top x0) 1)) (set All...,0.0,rollout,0,COMBINED,1
3,Is,Is the blue ship on the leftmost column?,(== (topleft (coloredTiles Blue)) (0-0)),0.0,rollout,0,COMBINED,1
4,Is the,Is the purple ship at least as long as the blu...,(>= (size Purple) (size Blue)),0.0,rollout,0,COMBINED,1
5,Is the,Is the purple ship above the blue ship?,(above Purple Blue),0.0,rollout,0,COMBINED,1
6,Is the,Is the blue ship horizontal?,(== (orient Blue) H),0.993542,rollout,0,COMBINED,1
7,Is the large,Is the large ship horizontal?,(== (orient Large) H),0.0,rollout,0,COMBINED,1
8,Is the large,Is the large ship horizontal?,(== (orient Large) H),0.0,rollout,0,COMBINED,1
9,Is the large,Is the large ship at 1A to the left of the sma...,(left (coloredTiles Large) (coloredTiles Small)),0.0,rollout,0,COMBINED,1
