**Full project write-up at: https://www.kaggle.com/competitions/llm-20-questions/discussion/529643**

In [None]:
########## Requirements for Kaggle dataset to run:

# Add the following datasets:
#   https://www.kaggle.com/datasets/jademonk/frequencies
#   https://www.kaggle.com/datasets/canming/llama-3-1-8b-instruct
#   https://www.kaggle.com/datasets/jademonk/frequencies

# Enable GPU and Internet for the notebook

In [None]:
%%bash
mkdir -p /kaggle/tmp/
mkdir -p /kaggle/working/submission


In [None]:
import numpy as np
np.__version__

In [None]:
from importlib.metadata import version
version('numpy')

In [None]:
# Install dependencies to tmp directory for packaging in solution

import os, sys
os.system("pip install --no-dependencies -U -t /kaggle/tmp/lib bitsandbytes")
sys.path.insert(0, "/kaggle/working/submission/lib")
sys.path.insert(0, "/kaggle/tmp/lib")

import numpy as np
np.__version__

!pip freeze > modified_requirements.txt

In [None]:
# Set up Llama 3.1

import shutil
src_path = r"/kaggle/input/llama-3-1-8b-instruct"
dst_path = r"/kaggle/tmp/llama-3-1/"
shutil.copytree(src_path, dst_path)

# Fix bug preventing Llama 3.1 from working on Kaggle

import json
with open(os.path.join("/kaggle/tmp/llama-3-1/", "config.json"), "r") as file:
    config = json.load(file)
config["rope_scaling"] = {"factor":8.0,"type":"dynamic"}
with open(os.path.join("/kaggle/tmp/llama-3-1/", "config.json"), "w") as file:
    json.dump(config, file)

In [None]:
# Copy keyword lists into working and submission directories

import shutil
src_path = r"/kaggle/input/keyword-list/keywords.txt"
dst_path = r"/kaggle/working/keywords.txt"
shutil.copy(src_path, dst_path)
dst_path = r"/kaggle/working/submission/keywords.txt"
shutil.copy(src_path, dst_path)

# src_path = r"/kaggle/input/kw-kats/kw_cats_for_test.csv"
# dst_path = r"/kaggle/working/kw_cats_for_test.csv"
# shutil.copy(src_path, dst_path)
# dst_path = r"/kaggle/working/submission/kw_cats_for_test.csv"
# shutil.copy(src_path, dst_path)

src_path = r"/kaggle/input/frequencies/my_freq.csv"
dst_path = r"/kaggle/working/my_freq.csv"
shutil.copy(src_path, dst_path)
dst_path = r"/kaggle/working/submission/my_freq.csv"
shutil.copy(src_path, dst_path)

In [None]:
import importlib
from importlib.metadata import version 
version("numpy")


In [None]:
%%writefile -a submission/prompts.py 

# Put line at top

import string
import unicodedata


# None of the Public Keywords have accent marks, so we should convert everything to ASCII
def strip_accents(text):
    try:
        text = unicode(text, 'utf-8')
    except NameError: # unicode is a default on python 3 
        pass

    text = unicodedata.normalize('NFD', text)\
           .encode('ascii', 'ignore')\
           .decode("utf-8")

    return str(text)

# From Kaggle environment
def normalize(s):
    t = str.maketrans("", "", string.punctuation)
    return strip_accents(s.lower().replace("the", "").replace(" ", "").translate(t))

# This class wraps prompt Q&A called by the main agent
class Prompter:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    # Turn a stack of messages into an answer
    def generate_from_messages(self, messages, temp=1.4, max_tokens=256):    
        input_ids = self.tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt"
        ).to(self.model.device)
        
        terminators = [
            self.tokenizer.eos_token_id,
            self.tokenizer.convert_tokens_to_ids("<|eot_id|>")
        ]
        
        outputs = self.model.generate(
            input_ids,
            max_new_tokens=256,
            eos_token_id=terminators,
            do_sample=True,
            temperature=temp,
            top_p=0.9,
        )
        response = outputs[0][input_ids.shape[-1]:]
        return self.tokenizer.decode(response, skip_special_tokens=True)

    # Base wrapper for all system prompts
    def sys_wrapper(self, sys_prompt):
        messages = []
        sys_message = {"role": "system"}
        sys_message["content"] = sys_prompt
        messages.append(sys_message)
        return messages

    # Combine prompts and run messages
    def run_prompts(self, messages, prompt, temp=1.4, DEBUG=False):        
        usr_message = {"role": "user", "content": prompt}
        if DEBUG:
            print(prompt + '\n') 
        messages.append(usr_message)
        output = self.generate_from_messages(messages, temp)
        if DEBUG:
            print(output + '\n')
        return output
        
    # System prompt for standard list-maker
    def sys_listmaker(self):
        return self.sys_wrapper("""You are a helpful AI assistant who is skilled in creating diverse lists of objects
        and asking questions to subdivide these objects into separate categories.\n""")

    # System prompt for re-phrasing sentences
    def sys_rephrase(self):
        return self.sys_wrapper("""You are a helpful AI assistant who is skilled at rephrasing questions. 
        You respect the integrity and meaning of the original sentence and do in-place substitutions only with no hallucinations."""   ) 

    # System prompt for answering questions
    def sys_answerer(self):
        return self.sys_wrapper("""You are a helpful AI assistant who is skilled in at answering yes-no-questions. 
        You are highly accurate and demonstrate a strong understanding of question nuances.""")

    # System prompt for fixing grammar
    def sys_grammar_editor(self):
        return self.sys_wrapper("""You are a helpful AI assistant who is skilled at editing and understanding grammar, sentence structure, and syntax. """)

    # Creates diverse lists of 30 things matching input criteria
    def list_of_thirty(self, pos_plural, prior_pos_plural, location, size, questions=None, answers=None, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        
        ask_prompt = "Create a list of 30 " + prior_pos_plural + " that are " 
        if len(size) > 0:
            ask_prompt += size + " "
        ask_prompt += pos_plural 
        if len(location) > 0:
            ask_prompt += "typically located in or at " + location
        ask_prompt += ". "
        #ask_prompt += "The things must be unique, diverse, " + pos_plural + ", and as different as possible from each other. "
        ask_prompt += "The " + pos_plural + " must be common examples of " + pos_plural + " and representative of a range of different possible options. "
        # ask_prompt += "The things must represent examples of as many different categories of " + pos_plural + " as possible. "
        ask_prompt += "The answer should be returned as a comma-separated list with no additional verbose output. "
        ask_prompt += "None of the " + pos_plural + " may be repeated. "
        # ask_prompt += "Each item in the list should be as different as possible from the prior item. "
        ask_prompt += "No words in the list may be repeated. "
        ask_prompt += "Each of the " + pos_plural + " in the list ABSOLUTELY MUST MEET EACH the following criteria:\n"
        for i, q, a in zip(range(0, len(questions)),questions, answers):
            ask_prompt += " " + str(i+1) + ". " + q + " " + a + ".\n"
        ask_prompt += " Respond with the comma-separated list only. Order the responses according to the most likely or common choices according to the criteria above."

        return self.run_prompts(messages, ask_prompt, temp, DEBUG)

    # Creates diverse lists of 30 things matching input criteria, including auto-summary of current knowledge
    def list_of_thirty_withsummary(self, pos_plural, prior_pos_plural, location, material, size, summary, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()

        ask_prompt = "Create a list of 30 " + prior_pos_plural + " that are " 
        if len(size) > 0:
            ask_prompt += size + " "
        ask_prompt += pos_plural 
        if material is not None:
            ask_prompt += " mostly made of " + material
        if len(location) > 0:
            ask_prompt += " typically located in or at " + location
        ask_prompt += ", "
        ask_prompt += f"matching the following description: {summary}\n"
        #ask_prompt += "The things must be unique, diverse, " + pos_plural + ", and as different as possible from each other. "
        ask_prompt += "The " + pos_plural + " must be common examples of " + pos_plural + " and representative of a range of different possible options. "
        # ask_prompt += "The things must represent examples of as many different categories of " + pos_plural + " as possible. "
        ask_prompt += "The answer should be returned as a comma-separated list with no additional verbose output. "
        ask_prompt += "None of the " + pos_plural + " may be repeated. "
        # ask_prompt += "Each item in the list should be as different as possible from the prior item. "
        ask_prompt += "No words in the list may be repeated. "
        # ask_prompt += "Each of the " + pos_plural + " in the list ABSOLUTELY MUST MEET the following description:\n"
        ask_prompt += "Respond with the comma-separated list only. Order the responses according to the most likely or common choices according to the criteria above."

        return self.run_prompts(messages, ask_prompt, temp, DEBUG)

    # Thirty diverse locations question maker
    def list_of_thirty_geo(self, type, plural, questions=None, answers=None, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        ask_prompt = "Create a list of 30 geographic " + plural + ". "
        ask_prompt += "The " + plural + " must be unique, diverse, and as different as possible from each other. "
        ask_prompt += "The things must represent examples of as many different categories or geographies of " + plural + " as possible. "
        ask_prompt += "The answer should be returned as a comma-separated list with no additional verbose output. "
        ask_prompt += "None of the " + plural + " may be repeated. "
        ask_prompt += "Each " + type + " in the list should be as different as possible from the prior " + type + ". "
        ask_prompt += "No words in the list may be repeated. "
        ask_prompt += "Each of the " + plural + " in the list should meet the following criteria:"
        for i, q, a in zip(range(0, len(questions)),questions, answers):
            ask_prompt += " " + str(i+1) + ". " + q + " " + a + "."
        ask_prompt += " Respond with the comma-separated list only."
        return self.run_prompts(messages, ask_prompt, temp, DEBUG)

    # Prompt to create two candidate subcategories for a given taxonomic category
    def split_category(self, prior_pos_plural, plural, negative_categories, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        prompt = 'Divide the category "' + prior_pos_plural + ' that are ' + plural + '" into two broad, clearly-defined, non-overlapping sub-categories,'
        prompt += 'ensuring that all ' + prior_pos_plural + ' that are ' + plural + ' fall into one category or the other, but not both. '
        prompt += 'Respond with the names of the two sub-categories separated by a comma only. Do NOT repeat the original category. Use common phrasing. '
        prompt += 'Each sub-category must be a noun or noun phrase.'
        if len(negative_categories) > 0:
            prompt += " Do not use any of the following sub-categories: "
            for cat, i in zip(negative_categories, range(0, len(negative_categories))):
                prompt += cat
                if i < len(negative_categories)-1:
                    prompt += ", "
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Prompt to create two candidate subcategories for a given taxonomic category, using auto-summary of known information
    def split_category_withsummary(self, summary, negative_categories, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        prompt = 'Divide the category described as "' + summary + '" into two broad, clearly-defined, non-overlapping sub-categories,'
        prompt += 'ensuring that all members of the category fall into one category or the other, but not both. '
        prompt += 'Respond with the names of the two sub-categories separated by a comma only. Do NOT repeat the original category. Use common phrasing. '
        prompt += 'Each sub-category must be a noun or noun phrase.'
        if len(negative_categories) > 0:
            prompt += " Do not use any of the following sub-categories: "
            for cat, i in zip(negative_categories, range(0, len(negative_categories))):
                prompt += cat
                if i < len(negative_categories)-1:
                    prompt += ", "
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Test whether something belongs to a category or not
    def question_category(self, category, temp=1.4, DEBUG=False):
        messages = self.sys_grammar_editor()

        prompt = 'Rephrase the question "Is it commonly and often described as a '+ category + '?" '
        prompt += " to use correct grammar, including modification of plurality, particle, or gender. Do not change any of the key words or concepts. "
        prompt += "The question should be a simple yes or no question. Respond with the question only without any additional introduction or conclusion."
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Test whether something is found in a certain location or not
    def question_location(self, location, temp=1.4, DEBUG=False):
        messages = self.sys_grammar_editor()

        prompt = 'Rephrase the question "Are they typically found in or at ' + location + '?"'
        prompt += " to use correct grammar, including modification of plurality, particle, or gender. Do not change any of the key words or concepts. "
        prompt += "The question should be a simple yes or no question. Respond with the question only without any additional introduction or conclusion."
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Generate noun phrases beginning with a known first word
    def noun_phrases(self, word, exclude=[], temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = f"Create 20 noun phrases describing tangible things that begin with the word '{word}'. Each noun phrase should be exactly two words long, and must begin with the word '{word}' followed by a space without any modification. Respond with the noun phrases in a comma-separated list with no introduction or other additional text."
        if len(exclude) > 0:
            prompt += " Do not include any of the following: '" + "', '".join(exclude) + "'."
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Is the thing a tangible object?
    def tangible_object(self, phrase, temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = f"Is '{phrase}' typically a tangible thing that can be seen or felt? Respond with a single word yes or no, without any introduction or additional text. Do not add punctuation to the answer."
        return normalize(self.run_prompts(messages, prompt, temp, DEBUG))

    # Test whether the keyword has a particular size
    def question_size(self, size, temp=1.4, DEBUG=False):
        messages = self.sys_grammar_editor()

        prompt = 'Rephrase the question "Are they typically ' + size + '?"'
        prompt += " to use correct grammar, including modification of plurality, particle, or gender. Do not change any of the key words or concepts. "
        prompt += "The question should be a simple yes or no question. Respond with the question only without any additional introduction or conclusion."
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Fix the grammar of a prompt
    def fix_grammar(self, prompt, temp=1.4, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'Rephrase the following question to use correct grammar: ' + prompt
        prompt += ' Do not change any of the key words or concepts. The question should be a simple yes or no question. Respond with the question only without any additional introduction or conclusion.'
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Identify the part of speech of a prompt
    def part_of_speech(self, prompt, temp=0.1, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'What is the English grammar part of speech of the word "' + prompt + '"? '
        prompt += 'Respond with the single word part of speech only. For example, repond with one of the following: noun, adjective, adverb, preposition.'
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Find an inverse category descriptor
    def negative_category(self, category, tested_category, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        prompt = 'Create a simple, singular category label for ' + category 
        prompt += ' that are NOT ' + tested_category + ' which includes all other possible ' + category
        prompt += '. Use common phrasing. Respond with a single label only, with no introductory or concluding text.'
        return self.run_prompts(messages, prompt, temp, DEBUG)
        
    # Thirty diverse things question maker - used in default modality
    def question_thirty(self, prior_output, questions=None, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        
        chat_template = "Create a simple yes-or-no question, responding with the question only "
        chat_template += "and no introduction or additional verbose details. "
        chat_template += "The question should broadly categorize and divide the following into two equally-sized lists: "
        chat_template += prior_output + ".\n\n"
        chat_template += "Do not include questions similar, equivalent, or directly opposite to the following: "
        for q in questions:
            chat_template += q + ", "
        chat_template = chat_template[:-2] + ". "
        chat_template += "Ensure that the question is simple, unambiguous, clear, and can be answered either yes or no. "
        chat_template += "Do not create compound questions. "
        chat_template += "The question may explore different aspects or characteristics of the list items including size, appearance, function, location, usage, and other defining characteristics. "
        chat_template += "The question should create a general or broad classification of the two categories and should not be overly specific."

        return self.run_prompts(messages, chat_template, temp, DEBUG)

    # Do thirty questions, but using geography
    def question_thirty_geo(self, type, plural, prior_output, questions=None, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()

        chat_template = "Create a simple positive yes-or-no question, responding with the question only "
        chat_template += "and no introduction or additional verbose details. "
        chat_template += "The question should broadly categorize and divide the following " + plural + " into two equally-sized lists: "
        chat_template += prior_output + ".\n\n"
        chat_template += "Do not include questions similar, equivalent, or directly opposite to the following: "
        for q in questions:
            chat_template += q + ", "
        chat_template = chat_template[:-2] + ". "
        chat_template += "Ensure that the question is simple, unambiguous, clear, positively-framed, and can be answered either yes or no. "
        chat_template += "Do not create compound questions. "
        chat_template += "The question may explore different aspects or characteristics of the " + plural + " including geographic location by "
        chat_template += "continent, hemisphere, or latitude, population, primary language, membership in international organizations, "
        chat_template += "unique features, or other characteristics. "
        chat_template += "The question should create a general or broad classification of the two categories and should not be overly specific."

        return self.run_prompts(messages, chat_template, temp, DEBUG)

    # Rephrase the question with the keyword as subject
    def rephrase_with_kw(self, question, keyword, temp=1.4, DEBUG=False):
        messages = self.sys_rephrase()
        rephrase_prompt = "Rephrase the following question to use '" + keyword 
        rephrase_prompt += "' as the subject of the sentence, adjusting tense, gender, and plurality as needed: \""
        rephrase_prompt += question + "\" Respond with the question only and no additional introduction or other text. "
        rephrase_prompt += "Do not change any important words other than the subject of the sentence. The rest of the sentence context should be unaltered."
        return self.run_prompts(messages, rephrase_prompt, temp, DEBUG)

    # The main prompt used by the answerer bot
    def answer_question(self, question, temp=1.4, DEBUG=False):
        messages = self.sys_answerer()
        ask_prompt = f"In layman's terms, {question} Answer this question in the most general, common sense as possible, ignoring any trivial nuances or exceptions. Please limit response to a single word, either yes or no, with no introduction, conclusion, or other verbose details. Do not add punctuation to the response.\n"
        return normalize(self.run_prompts(messages, ask_prompt, temp, DEBUG))

    # Split the word into singular and plural forms
    def singular_plural(self, phrase, temp=0.1, DEBUG=False):
        if phrase is None:
            return '', ''
        messages = self.sys_grammar_editor()
        ask_prompt = "Give me the singular and plural form of the phrase '" + phrase 
        ask_prompt += "' as two entries separated by the word aardvark. List the singular form first, the word aardvark, "
        ask_prompt += "then the plural form. Do not add additional introductory or concluding text."
        output = self.run_prompts(messages, ask_prompt, temp, DEBUG)
        pieces = output.split(' aardvark ')
        if len(pieces) == 2:
            return pieces[0], pieces[1]
        else:
            return output, output

    # Is the keyword a proper name? This prompt has a high error rate.
    def proper_name(self, keyword, temp=0.1, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = "Is '" + keyword + "' a proper name? Reply yes or no only, with no introduction or extra verbose text. Do not add punctuation to the response."
        return normalize(self.run_prompts(messages, prompt, temp, DEBUG))

    # Find the subject of the sentence
    def subject(self, question, temp=0.1, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = "What noun, noun phrase, or pronoun is the subject of the following question?\n"
        prompt += "'" + question + "' Respond with the subject word only with no introduction or other verbose text. Do not add punctuation to the response."
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Is the keyword plural?
    def plural(self, keyword, temp=0.1, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = "Is '" + keyword + "' in grammatically plural form? Reply yes or no only, with no intoduction or other verbose text. Do not add punctuation to the response."
        return normalize(self.run_prompts(messages, prompt, temp, DEBUG))

    # Create candidate locations where objects of the known category may be found
    def locations_list(self, second_last_cat, last_cat, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        prompt = "Provide several broad categories of locations where " + second_last_cat 
        prompt += " that are " + last_cat + " are most often located. Respond with the category names in comma-separated format only."
        output = self.run_prompts(messages, prompt, temp, DEBUG)
        return output

    # Divide a geographic continent into sub-regions
    def continental_regions_list(self, continent, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        prompt = "What are a few commonly-used subdivisions to describe country groups within " + continent 
        prompt += "? Respond with the subdivisions in comma-separated form with no introduction or additional verbose text."
        output = self.run_prompts(messages, prompt, temp, DEBUG)
        return output

    # Further divide into sub-sub-regions
    def continental_region_subs_list(self, continent, region, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        prompt = "What are a few commonly-used subdivisions to describe country groups within the " + region + " subregion of " + continent 
        prompt += "? Respond with the largest four or five subdivisions in comma-separated form with no introduction or additional verbose text."
        output = self.run_prompts(messages, prompt, temp, DEBUG)
        return output

    # Divide large countries into smaller regions
    def country_subregions_list(self, country, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        prompt = "What are a few commonly-used subdivisions to describe groups of states, provinces, or other areas within " + country 
        prompt += "? Respond with the largest four or five subdivisions in comma-separated form with no introduction or additional verbose text."
        output = self.run_prompts(messages, prompt, temp, DEBUG)
        return output

    # Is the keyword a country?
    def is_a_country(self, to_test, temp=0.6, DEBUG=False):
        messages = self.sys_answerer()
        prompt = "Is " + to_test + " a single, sovereign country? Respond with the single word yes or no only, with no added text, verbosity, or punctuation."
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Enumerate a list of candidate countries within a subregion
    def country_list(self, continent, region, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        if region is not None:
            prompt = "Enumerate all of the countries in the " + region + " subregion of " + continent 
        else:
            prompt = "Enumerate all of the countries in " + continent 
        prompt += ", ordered from largest to smallest. Respond with the country names in comma-separated form with no introduction or additional verbose text."
        output = self.run_prompts(messages, prompt, temp, DEBUG)
        return output

    # More general form of country candidate enumeration
    def country_list(self, continent, region, subregion, temp=1.4, DEBUG=False):
        messages = self.sys_listmaker()
        if region is not None and subregion is not None:
            prompt = "Enumerate all of the countries in the " +subregion+ " subregion of the " + region + " subregion of " + continent 
        elif region is not None:
            prompt = "Enumerate all of the countries in the " + region + " subregion of " + continent 
        else:
            prompt = "Enumerate all of the countries in " + continent 
        prompt += ". Respond with the country names in comma-separated form with no introduction or additional verbose text."
        output = self.run_prompts(messages, prompt, temp, DEBUG)
        return output

    # Reframe the question-answer pair into a statement of fact. Used for auto-summarization of knowledge.
    def reframe_as_statement(self, question, answer, temp=1.4, DEBUG=False):
        messages = self.sys_rephrase()
        prompt = "The answer to: " + question + " is " + answer + ". Restate this question and answer as a single statement, accurately reflecting the substance of both the question and the answer. "
        prompt += "Respond with the statement only, with no introduction or other text added."
        output = self.run_prompts(messages, prompt, temp, DEBUG)
        return output

    # Key function used in auto-summarization of known information
    def update_summary(self, old_summary, new_statement, temp=1.4, DEBUG=False):
        messages = self.sys_rephrase()
        prompt = "A category is described by the following category summary: " + old_summary
        prompt += " Update this detailed summary to reflect the following new information, consolidating redundant information where needed: " + new_statement
        prompt += " If the new information conflicts with the summary, assume the new information is correct. Do not remove any significant details such as abjectives or exclusions. Respond with the revised, detailed summary text only. Do not add any introduction or additional verbosity."
        output = self.run_prompts(messages, prompt, temp, DEBUG)
        return output

    # Is this an alphabetical order question?
    def alpha_check(self, question, temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'Does the question "' + question + '" ask whether the keyword comes after another word in alphabetical order, sorting order, and/or lexicographical order? Answer yes or no only with no introduction or additional text. Do not add punctuation to the response.'
        return normalize(self.run_prompts(messages, prompt, temp, DEBUG))

    # Find comparision word in the alphabetical order question
    def alpha_extract_word(self, question, temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'What word or letter does the question "'+question+'" want me to compare the keyword to? Respond with the noun phrase, word, or letter only. Do not add punctuation to the response.'
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # What type of ordering are we looking for?
    def alpha_earlier_later(self, question, test_word, temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'Does the question "'+question+'" want me to test whether the keyword is earlier or later compared to "'+test_word+'" in lexicographical order? Respond with the word earlier or later only. Do not add punctuation to the response.'
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Does the question ask about containing letters?
    def alpha_container_check(self, question, temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'Does the question "' + question + ' explicitly list a particular letter or list of letters? Answer yes or no only, with no additional text. Do not add punctuation to the response.'
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Extract the test letters from the question
    def alpha_extract_letters(self, question, temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'What letters does the question "' + question + '" want me to test? Respond with the individual letter or letters only in a comma-separated list, with no additional text.'
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Does the question ask about the start letter?
    def alpha_begins_contains(self, question, temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'Does the question "'+question+'" want me to test whether the keyword begins with or contains particular letters? Respond with either the single word "begins" or "contains" only, and do not add any additional text or introduction. Do not add punctuation to the answer.'
        return self.run_prompts(messages, prompt, temp, DEBUG)

    # Is the question asking me to compare the keyword against an explicit list?
    def alpha_explicit_list(self, question, temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'Does the question "'+question+'" include an explicit list of words or phrases to match an unknown keyword to? Respond with yes or no only. Do not add punctuation to the answer.'
        return normalize(self.run_prompts(messages, prompt, temp, DEBUG))

    # Extract the explicit list from the question
    def alpha_extract_list(self, question, temp=0.6, DEBUG=False):
        messages = self.sys_grammar_editor()
        prompt = 'What are the entries in the explicit list provided by the question "'+question+'" Respond with the list entries in comma-separated format only with no additional text.'
        return self.run_prompts(messages, prompt, temp, DEBUG)



In [None]:
%%writefile -a submission/tagmanager.py 


# This class stores state information to disk
# I built this assuming that my agent would be re-initialized at each step
# However, the objects persist from step to step, so it's easier just to store state information in the objects themselves
# A simple dictionary would have sufficed.


from prompts import Prompter
import json
import os

class TagManager():
    def __init__(self, prompter):
        self.prompter = prompter
        self.tags = {}
        self.read_tags()

    # Write tags to file
    def write_tags(self):
        with open('tags.json', 'w', encoding='utf-8') as f:
            json.dump(self.tags, f, ensure_ascii=False, indent=4)

    # Read tags from file
    def read_tags(self):
        if os.path.exists('tags.json'):
            with open('tags.json', 'r') as f:
                self.tags = json.load(f)
        else:
            self.tags = {}

    # Make a new tag, storing both singular and plural forms where appropriate
    def make_tag(self, label, singular, plural=None):
        if plural is None:
            _, p = self.prompter.singular_plural(singular, 0.1, False)
            s = singular
            if s == p:
                s = singular
                p = singular
            plural = p
            singular = s
        self.tags[label] = (singular, plural)
        self.write_tags()
        print("Writing tags:\n\n")
        print(json.dumps(self.tags) + "\n")
        return (singular, plural)

    # Delete a tag
    def delete_tag(self, label):
        if label in self.tags.keys():
            del self.tags[label]
            self.write_tags()

    def get_category(self):
        if "category" in self.tags.keys():
            return self.tags["category"]
        else:
            return "unknown keyword", "unknown keywords"

    def set_category(self, singular, plural=None):
        s_prior, p_prior = self.get_prior_category()
        self.make_tag("prior category", s_prior, p_prior)
        self.make_tag("category", singular, plural)

    def get_prior_category(self):
        if "prior category" in self.tags.keys():
            return self.tags["prior category"]
        else:
            return "unknown keyword", "unknown keywords"  

    def set_prior_category(self, singular, plural=None):
        self.make_tag("prior category", singular, plural)

    def get_tested_category(self):
        if "tested category" in self.tags.keys():
            return self.tags["tested category"]
        else:
            return self.get_category()

    def set_tested_category(self, singular, plural=None):
        self.make_tag("tested category", singular, plural)

    def get_all_tested_categories(self):
        if "all tested categories" in self.tags.keys():
            return self.tags["all tested categories"]
        else:
            return [], []

    def set_all_tested_categories(self, singulars, plurals=[]):
        if len(singulars) != len(plurals):
            for i in range(len(singulars)-len(plurals), len(singulars)):
                plurals.append(self.prompter.singular_plural(singulars[i], 0.1, False)[1])
        self.make_tag("all tested categories", singulars, plurals)
        

    def get_alternate_category(self):
        if "alternate category" in self.tags.keys():
            return self.tags["alternate category"]
        else:
            return None, None
    
    def set_alternate_category(self, singular, plural=None):
        self.make_tag("alternate category", singular, plural)

    def set_last_positive_category(self, singular, plural=None):
        s, p = self.make_tag("last positive category", singular, plural)
        pos_s, pos_p = self.get_positive_categories()
        if s not in pos_s:
            pos_s.append(s)
            pos_p.append(p)
            self.set_positive_categories(pos_s, pos_p)

    def get_last_positive_category(self):
        if "last positive category" in self.tags.keys():
            return self.tags["last positive category"]
        else:
            return None, None



    def get_negative_categories(self):
        if "negative categories" in self.tags.keys():
            return self.tags["negative categories"]
        else:
            return [], []

    def set_negative_categories(self, singulars, plurals=[]):
        if len(singulars) != len(plurals):
            for i in range(len(singulars)-len(plurals), len(singulars)):
                plurals.append(self.prompter.singular_plural(singulars[i], 0.1, False)[1])
        self.make_tag("negative categories", singulars, plurals)

    def get_positive_categories(self):
        if "positive categories" in self.tags.keys():
            return self.tags["positive categories"]
        else:
            return ["tangible object"], ["tangible objects"]

    def set_positive_categories(self, singulars, plurals=[]):
        if len(singulars) != len(plurals):
            for i in range(len(singulars)-len(plurals), len(singulars)):
                plurals.append(self.prompter.singular_plural(singulars[i], 0.1, False)[1])
        self.make_tag("positive categories", singulars, plurals)

    def get_modality(self):
        if "modality" not in self.tags.keys():
            self.make_tag("modality", "begin", "begin")
        return self.tags["modality"]

    def set_modality(self, new_modality):
        # print(f"\nSetting modality {new_modality}\n")
        self.make_tag("modality", new_modality, new_modality)

    def get_locations_list(self):
        if "locations_list" in self.tags.keys():
            return self.tags["locations_list"]
        else:
            return [], []

    def set_locations_list(self, locations_list):
        self.make_tag("locations_list", locations_list, locations_list)

    def get_continental_regions(self):
        if "continental regions" in self.tags.keys():
            return self.tags["continental regions"]
        else:
            return [], []

    def set_continental_regions(self, continental_regions_list):
        self.make_tag("continental regions", continental_regions_list, continental_regions_list)

    def get_country_subregions(self):
        if "country subregions" in self.tags.keys():
            return self.tags["country subregions"]
        else:
            return [], []

    def set_country_subregions(self, country_subregions_list):
        self.make_tag("country subregions", country_subregions_list, country_subregions_list)

    def get_country_list(self):
        if "country list" in self.tags.keys():
            return self.tags["country list"]
        else:
            return [], []

    def set_country_list(self, country_list):
        self.make_tag("country list", country_list, country_list)

    def set_continental_region(self, continental_region):
        self.make_tag("continental region", continental_region, continental_region)

    def get_continental_region(self):
        if "continental region" in self.tags.keys():
            return self.tags["continental region"]
        else:
            return None, None



    def get_iter(self):
        if "iter" not in self.tags.keys():
            self.make_tag("iter",0, 0)
        return self.tags["iter"]

    def set_iter(self, new_iter):
        self.make_tag("iter", new_iter, new_iter)

    def set_location(self, location):
        self.make_tag("location", location, location)

    def get_location(self):
        if "location" in self.tags.keys():
            return self.tags["location"]
        else:
            return None, None

    def set_material(self, material):
        self.make_tag("material", material, material)

    def get_material(self):
        if "material" in self.tags.keys():
            return self.tags["material"]
        else:
            return None, None

    def set_size(self, size):
        self.make_tag("size", size, size)

    def get_size(self):
        if "size" in self.tags.keys():
            return self.tags["size"]
        else:
            return None, None

    def set_landmark_type(self, landmark_type):
        self.make_tag("landmark type", landmark_type, landmark_type)

    def get_landmark_type(self):
        if "landmark type" in self.tags.keys():
            return self.tags["landmark type"]
        else:
            return None, None

    def get_statements_list(self):
        if "statements list" in self.tags.keys():
            return self.tags["statements list"]
        else:
            return [], []

    def set_statements_list(self, statements_list):
        self.make_tag("statements list", statements_list, statements_list)

    def get_summary(self):
        if "summary" in self.tags.keys():
            return self.tags["summary"]
        else:
            return "A thing.", "A thing."

    def set_summary(self, summary):
        self.make_tag("summary", summary, summary)

    def get_proper_name(self):
        if "proper name" in self.tags.keys():
            return self.tags["proper name"]
        else:
            return None, None

    def set_proper_name(self, yesno):
        self.make_tag("proper name", yesno, yesno)

    def get_article(self):
        if "article" in self.tags.keys():
            return self.tags["article"]
        else:
            return '', ''

    def set_article(self, article):
        self.make_tag("article", article, article)

    def get_plural(self):
        if "plural" in self.tags.keys():
            return self.tags["plural"]
        else:
            return "no", "no"

    def set_plural(self, yesno):
        self.make_tag("plural", yesno, yesno)

    def get_type(self):
        if "type" in self.tags.keys():
            return self.tags["type"]
        else:
            return None, None

    def set_type(self, type, plural=None):
        self.make_tag("type", type, plural)

    def set_continent(self, continent, continent_adj):
        self.make_tag("continent", continent, continent_adj)

    def get_continent(self):
        if "continent" in self.tags.keys():
            return self.tags["continent"]
        else:
            return None, None

    def set_country(self, country):
        self.make_tag("country", country, country)

    def get_country(self):
        if "country" in self.tags.keys():
            return self.tags["country"]
        else:
            return None, None

    def set_country_subregion(self, country_subregion):
        self.make_tag("country subregion", country_subregion, country_subregion)

    def get_country_subregion(self):
        if "country subregion" in self.tags.keys():
            return self.tags["country subregion"]
        else:
            return None, None


    def set_continental_region_sub(self, continental_region_sub):
        self.make_tag("continental region sub", continental_region_sub, continental_region_sub)

    def get_continental_region_sub(self):
        if "continental region sub" in self.tags.keys():
            return self.tags["continental region sub"]
        else:
            return None, None

    def set_continental_region_subs(self, continental_region_subs):
        self.make_tag("continental region subs", continental_region_subs, continental_region_subs)

    def get_continental_region_subs(self):
        if "continental region subs" in self.tags.keys():
            return self.tags["continental region subs"]
        else:
            return [],[]

    

In [None]:
%%writefile -a submission/main.py

# Set up the agent
import os
KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
if os.path.exists(KAGGLE_AGENT_PATH):
    model_id = os.path.join(KAGGLE_AGENT_PATH, "llama-3-1")
else:
#     model_id = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"
    model_id = "/kaggle/tmp/llama-3-1"
    
# Load keyword files
if os.path.exists(KAGGLE_AGENT_PATH):
    keywords_txt = os.path.join(KAGGLE_AGENT_PATH, "keywords.txt")
#     kw_cat_txt = os.path.join(KAGGLE_AGENT_PATH, "kw_cats_for_test.csv")
    freq_csv = os.path.join(KAGGLE_AGENT_PATH, "my_freq.csv")
else:
    keywords_txt = "/kaggle/input/keyword-list/keywords.txt"
#     kw_cat_txt = "/kaggle/input/kw-kats/kw_cats_for_test.csv"
    freq_csv = "/kaggle/input/frequencies/my_freq.csv"

# Debug flags
DUMB_GUESSER = False
FIXED_KEYWORD = None

#----------- Copy from here down

PRECAT_ON = False # Disable my offline analysis - it underperformed versus my online-only solution

TEMPERATURE = 1.4 # Default temperature. Overridden in most cases
DEBUG = True

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
import torch
import sys
import shutil
import unicodedata
import pandas as pd
import math
import random
import re
import datetime
import string



# I thought about having the agent automatically switch over keyword lists on the submission deadline
# However, I didn't want to risk introducing any last-minute bugs into the code, so didn't do this.

# KEYWORD_BIAS = 'on' # Keyword bias causes the LLM to prefer guesses in a pre-defined keyword list

# day = datetime.datetime.today().day
# month = datetime.datetime.today().month

# Time the keyword exclusion date to kick in in 8/13
# if month > 8 or (month >= 8 and day > 13):

KEYWORD_BIAS = 'exclude' # Exclude all known public keywords from guessing
AGENT_ALPHA = True # Turn on alpha binary search behavior

transformers.logging.set_verbosity_error()

# Replace non-ASCII characters with ASCII equivalents
def strip_accents(text):
    try:
        text = unicode(text, 'utf-8')
    except NameError: # unicode is a default on python 3 
        pass

    text = unicodedata.normalize('NFD', text)\
           .encode('ascii', 'ignore')\
           .decode("utf-8")

    return str(text)

# From Kaggle environments code
def normalize(s):
    t = str.maketrans("", "", string.punctuation)
    return strip_accents(s.lower().replace("the", "").replace(" ", "").translate(t))

# From Kaggle environments code
def compare_words(a, b):
    a = normalize(a)
    b = normalize(b)
    if a == b:
        return True
    # don't check for plurals if string is too short
    if len(a) < 3 or len(b) < 3:
        return False
    # accept common plurals
    if a[-1] == "s" and a[:-1] == b:
        return True
    if b[-1] == "s" and a == b[:-1]:
        return True
    if a[-2:] == "es" and a[:-2] == b:
        return True
    if b[-2:] == "es" and a == b[:-2]:
        return True
    return False

# Is the keyword in a particular list (using Kaggle environment comparison function)?
def already_in(new_word, wordlist):
    for word in wordlist:
        if compare_words(new_word, word):
            return True
    return False

from prompts import Prompter
from tagmanager import TagManager

# Clean up tags after old run - should not exist, assuming environment fully reset between games
if os.path.exists("tags.json"):
    os.remove("tags.json")   

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

# Load the model (Llama 3.1 7B 8-bit quantized)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True, torch_dtype=torch.bfloat16, device_map="auto")
model.generation_config.pad_token_id = tokenizer.pad_token_id
id_eot = tokenizer.convert_tokens_to_ids(["<|eot_id|>"])[0]

# Candidate "sizes" that they keyword may be
size_list = ["small", "large"]

def generate_from_messages(messages, temp=TEMPERATURE, max_tokens=256):
    
    input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)
    
    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    
    outputs = model.generate(
        input_ids,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
        temperature=temp,
        top_p=0.9,
    )
    response = outputs[0][input_ids.shape[-1]:]
    return tokenizer.decode(response, skip_special_tokens=True)

def generate_answer(template, max_tokens=15):
    inp_ids = tokenizer(template, return_tensors="pt").to("cuda")
    out_ids = model.generate(**inp_ids,max_new_tokens=max_tokens).squeeze()
    start_gen = inp_ids.input_ids.shape[1]
    out_ids = out_ids[start_gen:]
    if id_eot in out_ids:
        stop = out_ids.tolist().index(id_eot)
        out = tokenizer.decode(out_ids[:stop])
    else:
        out = tokenizer.decode(out_ids)
    return out
  

class Robot:
    def __init__(self, fixed_keyword = None):
        self.p = Prompter(model, tokenizer)

        # Some keyword corrections
        self.keyword_supply = ['mount mckinley', 'mt mckinley', 'mt st helens', 
                           'mt everest', 'mount everest', 'mount cook', 'mt cook', 'amazonas',
                          'ganges river', 'changjiang', 'huang he', "kiev", "kyiv", "kyiv ukraine",
                          'saigon', 'usa', 'united states', 'uk', 'great britain', 'eswatini',
                          'republic of korea', 'democratic peoples republic of korea', 'dprk', 'holland',
                          'burma', 'congo kinshasa', 'air conditioning', 'cardboard boxes', 'fan',
                          'coffee machine', 'coffee machines', 'comic', 'comics', 'diving platform',
                          'Dumbbell Weights', 'facial mask', 'pier', 'fungus', 'fuses', 'hair tie', 'hdmi',
                          'irrigation', 'khakis', 'laundry hamper', 'mice', 'ppe', 'pa', 'pop', 'soda-pop',
                          'speaker system', 'tea leaves', 'tv', 'touchscreen displays', 'trash bin', 'willow',
                          'aerator', 'decanter', ]

        # Some corrections to the public keyword set
        self.corrections = {'korea': 'south korea', 'mount saint elias': 'mount saint lias', 'mount denali': 'denali',
                           'mt st. helens': 'mount saint helens', 'mount st. helens': 'mount saint helens', 'amazon river': 'amazon', 'dnieper river': 'dnieper',
                           'rhine river': 'rhine', 'nile river': 'nile', 'mount sumantri': 'sumantri', 'mount puncak jaya':
                           'puncak jaya', 'mount haleakala': 'haleakala', 'mount kangchenjunga': 'kangchenjunga',
                           'mount nanga parbat': 'nanga parbat', 'mount dhaulagiri': 'dhaulagiri', 'mount annapurna': 'annapurna',
                           'mount manaslu': 'manaslu', 'mount cho oyu': 'cho oyu', 'mount makalu': 'makalu', 
                           'mount lhotse': 'lhotse', 'mount zugspitze': 'zugspitze', 'mount chimborazo': 'chimborazo',
                           'mount cotopaxi': 'cotopaxi', 'mount fitz roy': 'fitz roy', 'mount aconcagua': 'aconcagua',
                           'mount iztaccihuatl': 'iztaccihuatl', 'nail clippers': 'nail clipper'}

        self.reset(fixed_keyword)
        

    def reset(self, fixed_keyword):

        # If we are re-setting, eliminate all old tags
        if os.path.exists("tags.json"):
            print("Deleting existing tags\n")
            os.remove("tags.json")  
        
        self.tagman = TagManager(self.p)
        self.fixed_keyword = fixed_keyword
        self.alpha_on = False
        self.testwords = []
        self.tested_phrases = []

        # Load the public keyword list
        if os.path.exists(keywords_txt):
            print("Found keyword file\n\n")
            file1 = open(keywords_txt, 'r')
            Lines = file1.readlines()

            # Add the keyword to our keyword supply
            count = 0
            for line in Lines:
                if count > 0:
                    parts = line.split(',')
                    if len(parts) == 3:
                        category = parts[2].strip().lower()
                        kw = parts[1].strip().lower()
                        self.keyword_supply.append(kw.lower())
                count += 1
            print(f"Loaded {str(len(self.keyword_supply))} keywords.\n\n")
        else:
            print("No keyword file found!\n\n")

        self.freq = {}
        self.first_candidates = []
        self.second_candidates = []
        self.candidate_phrases = []
        self.first_word = None
        self.second_word = None
        
        # Load the unigram frequency list for alpha searching
        if os.path.exists(freq_csv):
            print("Found frequency file\n\n")
            file1 = open(freq_csv, 'r')
            Lines = file1.readlines()

            # Add the keyword to our frequency data and candidates list
            count = 0
            for line in Lines:
                if count > 0:
                    parts = line.split(',')
                    self.freq[parts[0].lower().strip()] = int(parts[1].lower().strip())
                    self.first_candidates.append(parts[0].lower().strip())
                    self.second_candidates.append(parts[0].lower().strip())
                count += 1
            print(f"Loaded {str(len(self.freq))} frequency entries.\n\n")
            self.first_candidates.sort()
            self.second_candidates.sort()
        else:
            print("No frequency file found!\n\n")

#         self.kw_cat = pd.read_csv(kw_cat_txt)
#         print(self.kw_cat.sum())

    # Filter out obviously incorrect phrase choices
    def valid_phrase(self, phrase, obs):
        c = strip_accents(phrase.lower().strip())
        print(c)
        if self.first_word is not None:
            if len(c) > len(self.first_word) and c[len(self.first_word)] != ' ' and ' ' not in c:
                c = self.first_word + ' ' + c[len(self.first_word):]
            elif len(c) > len(self.first_word) and c[len(self.first_word)] != ' ':
                print("non contiguous")
                return None
        if already_in(c, self.keyword_supply):
            print("in kw supply")
            return None
        if already_in(c, obs.guesses):
            print("in guesses")
            return None
        for a, testword in zip(obs.answers[1:], self.testwords):
            if a == 'yes' and c >= testword:
                print("already excluded")
                return None
            if a == 'no' and c < testword:
                print("already excluded")
                return None
        to = self.p.tangible_object(c)
        if to.strip().lower() == 'no':
            print("not tangible")
            return None
        return c
    
    def on(self, mode, obs):
        assert mode in ["asking", "guessing", "answering"], "mode can only take one of these values: asking, answering, guessing"
        
        if mode == "asking":
            #launch the asker role
            output = self.asker(obs)
        if mode == "answering":
            #launch the answerer role
            output = self.answerer(obs)
            if "yes" in output.lower():
                output = "yes"
            elif "no" in output.lower():
                output = "no"   
            if ("yes" not in output.lower() and "no" not in output.lower()):
                output = "yes"
        if mode == "guessing":
            #launch the guesser role
            output = self.asker(obs)
        if DEBUG:
            print(output)
        return output
    
    
    def asker(self, obs):
   
        if obs.turnType =="ask":
            if DEBUG:
                print("ASKER:\n")

            # Default first question - skip if KEYWORD_BIAS is off
            if len(obs.questions)<1 and AGENT_ALPHA:
                output = """Is it Agent Alpha?"""
                return output
            elif len(obs.questions)<1:
                if PRECAT_ON:
                    self.tagman.set_modality('precat') # Go to offline category mode
                else:
                    self.tagman.set_modality('category') # Go to online category mode (used in final submission)

            # Determine preferred modality to use 
            modality, _ = self.tagman.get_modality()

            #-----------------------------------------------------------
            # BEGIN MODALITY
            #-----------------------------------------------------------
            assert (modality != 'begin')
                
            # Code for "THINGS" ##########################################
            #------------------------------------------------------
            # MODALITY DETERMINATION
            #------------------------------------------------------

            # This code determines if the modality needs to change
            
            if modality == 'first_word':
                if self.first_word is not None:
                    modality = 'second_word'
                    self.tagman.set_modality(modality)
                if len(self.first_candidates) == 0:
                    if PRECAT_ON:
                        self.tagman.set_modality('precat')
                    else:
                        self.tagman.set_modality('category')
                    self.tagman.set_modality(modality)

            if modality == 'second_word':
                if len(self.second_candidates) == 0:
                    if PRECAT_ON:
                        self.tagman.set_modality('precat')
                    else:
                        self.tagman.set_modality('category')
                    self.tagman.set_modality(modality)
            
            # Determine if modality should change
            if modality == 'precat':
                pass
                    
            if modality == 'category':
                if len(obs.answers) > 6:
                    if len(obs.answers) >= 13 or (obs.answers[-1] == 'no' and obs.answers[-2] == 'no' and obs.answers[-3] == 'no' and obs.answers[-4] == 'no'):
                        location, _ = self.tagman.get_location()
                        if location is None:
                            self.tagman.set_modality("location")
                            print('\n\nSWITCHING TO LOCATION\n\n')
                        else:
                            self.tagman.set_modality("size")

            if modality == 'location':
                current_location, _ = self.tagman.get_location()
                locations_list, _ = self.tagman.get_locations_list()
                continent, _ = self.tagman.get_continent()
                iter, _ = self.tagman.get_iter()
                if continent is not None: # Don't do size or location for places
                    modality = "default"
                    self.tagman.set_modality("default")
                    self.tagman.set_iter(0)
                if (current_location is not None and current_location.lower().strip() not in ['nature', 'indoors', 'outdoors', 'home']) or \
                    iter == len(locations_list) or iter > 5:
                    modality = "size"
                    self.tagman.set_modality("size")
                    self.tagman.set_iter(0)
                    
            if modality == 'size':
                iter, _ = self.tagman.get_iter()
                iter = int(iter)
                size, _ = self.tagman.get_size()
                if size is not None or iter >= len(size_list):
                    self.tagman.set_modality("default")
                    modality = "default"
                    self.tagman.set_iter(0)

            modality, _ = self.tagman.get_modality()


            # ---------------------------------------------------------
            # FIRST WORD MODALITY - find the first word of the keyword
            # ---------------------------------------------------------
            if modality == 'first_word':
                index = len(self.first_candidates) // 2
                try:
                    keyword = self.first_candidates[index]
                except Exception as e:
                    print(self.first_candidates)
                    raise e
                self.testwords.append(keyword)
                output = f"Does the keyword (in lowercase) come before \"{keyword}\" in alphabetical order?"

            # ---------------------------------------------------------
            # SECOND WORD MODALITY - find the second word of the keyword
            # --------------------------------------------------------- 

            if modality == 'second_word':
                if len(self.candidate_phrases) >= 2:
                    index = len(self.candidate_phrases) // 2 
                    cpy = [x for x in self.candidate_phrases]
                    cpy.sort()
                    keyword = cpy[index]
                    self.testwords.append(keyword)
                    output = f"Does the keyword (in lowercase) come before \"{keyword}\" in alphabetical order?"
                else:
                    index = len(self.second_candidates) // 2
                    keyword = self.first_word + ' ' + self.second_candidates[index]
                    self.testwords.append(keyword)
                    output = f"Does the keyword (in lowercase) come before \"{keyword}\" in alphabetical order?"
            
            # ---------------------------------------------------------
            # PRECAT MODALITY - use offline computed categories [DISABLED]
            # ---------------------------------------------------------
            if modality == 'precat':
                pass

            try:
                # ---------------------------------------------------------
                # CATEGORY MODALITY - automatically compute taxonomic categories
                # ---------------------------------------------------------
                if modality == 'category':                   
    
                    should_subcategorize = True
                    
                    if self.tagman.get_category()[0] == "unknown keyword":
                        if self.tagman.get_type()[0] is not None:
                            self.tagman.make_tag("category", self.tagman.get_type()[0])
                            self.tagman.set_last_positive_category("place") 
                        else:
                            self.tagman.make_tag("category", "tangible object")
                            self.tagman.set_last_positive_category("tangible object")                
    
                    # Refine the category
                    if "tested category" in self.tagman.tags.keys():
                        answer = obs.answers[-1]
                        t_single, t_plural = self.tagman.get_tested_category()
                        
                        if answer.lower().strip() == 'no': # Failure!
    
                            # See if there was an alternate which we should now test
                            if "alternate category" in self.tagman.tags.keys():
                                alt_single, alt_plural = self.tagman.get_alternate_category()
                                self.tagman.set_category(alt_single, alt_plural)
                                self.tagman.set_tested_category(alt_single, alt_plural)
                                self.tagman.delete_tag("alternate category")
                                should_subcategorize = False # Test this first before sub-categorizing
                            else:
                                # If there's no alternate, we tested both and both failed. Skip back to the last positive category.
                                pos_singular, pos_plural = self.tagman.get_last_positive_category()
                                self.tagman.set_category(pos_singular, pos_plural)
                                self.tagman.set_tested_category(pos_singular, pos_plural)
                                should_subcategorize = True
                        
    
                    singular_to_test, plural_to_test = self.tagman.get_category()
    
                    # Get all previously tested categories
                    all_singular, all_plural = self.tagman.get_all_tested_categories()
                    
                    if should_subcategorize:                    
                        # Create subcategories
                        _, positive_categories = self.tagman.get_positive_categories()
                        if len(positive_categories) > 1:
                            prior_positive_category = positive_categories[-2]
                        else:
                            prior_positive_category = 'things'
    
                        # Find candidate subcategories
                        output = self.p.split_category(prior_positive_category, plural_to_test, all_plural, 0.6, DEBUG)
                        
                        # Test the first category
                        parts = output.split(',')
                        if len(parts) == 1:
                            parts = output.split(' and ')
                        self.tagman.make_tag("tested category", parts[0]) # Record the category we are testing
                        if len(parts) > 1:
                            self.tagman.make_tag("alternate category", parts[1]) # Record the alternate that we're not testing
                        singular_to_test = parts[0]                    
                        singular_to_test, plural_to_test = self.p.singular_plural(singular_to_test, 0.1, DEBUG)
    
                    # Record this as a tested category
                    all_singular.append(singular_to_test)
                    all_plural.append(plural_to_test)
                    self.tagman.set_all_tested_categories(all_singular, all_plural)
    
                    # Make a question to test category membership
                    words = singular_to_test.split(' ')
                    #part_of_speech = self.p.part_of_speech(words[-1])
                    output = self.p.question_category(singular_to_test, 0.1, DEBUG)              
        
                # ---------------------------------------------------------
                # LOCATION MODALITY - automatically enumerate and test possible locations where the keyword exists
                # ---------------------------------------------------------
                elif modality == 'location':  
                    # Initialize the location search
                    locations_list, _ = self.tagman.get_locations_list()
                    iter, _ = self.tagman.get_iter()
                    iter = int(iter)
    
                    # Find last and second to last category
                    cat_single, cat_plural = self.tagman.get_positive_categories()
                    last_cat = cat_plural[-1]
                    if len(cat_plural) > 1:
                        second_last_cat = cat_plural[-2]
                    else:
                        second_last_cat = "things"
    
                    # Get a list of locations if we don't have one yet
                    if len(locations_list) == 0:    
                        # Create a list of locations to search
                        locations_list = self.p.locations_list(second_last_cat, last_cat, 1.0, DEBUG).split(',')
                        self.tagman.set_locations_list(locations_list)
    
                    # Test if the keyword typically found in this location
                    location_to_test = locations_list[iter]
                    output = self.p.question_location(location_to_test, 0.1, DEBUG)  
    
                #---------------------------------------------------------
                # SIZE MODALITY - test possible sizes of the keyword
                #---------------------------------------------------------
    
                elif modality == 'size':
                    iter, _ = self.tagman.get_iter()
                    iter = int(iter)
                    size_to_test = size_list[int(iter)]
    
                    output = self.p.question_size(size_to_test, 0.1, DEBUG)
    
                #---------------------------------------------------------
                # DEFAULT MODALITY - enumerate possible keywords and create question to attempt to categorize them
                #---------------------------------------------------------
    
                elif modality == 'default':
            
                    # Get known information tags about the keyword
                    pos_s, pos_p = self.tagman.get_positive_categories()
                    prior_category_plural = 'things'
                    category_plural = 'tangible things'
                    if len(pos_p) >= 2:
                        category_plural = pos_p[-1]
                        prior_category_plural = pos_p[-2]
                    location, _ = self.tagman.get_location()
                    if location is None:
                        location = ''
                    size, _ = self.tagman.get_size()
                    if size is None:
                        size = ''
    
                    material, _ = self.tagman.get_material() # Only exists for precat modality
    
                    # Enumerate possible keywords
                    summary, _ = self.tagman.get_summary()
                    output = self.p.list_of_thirty_withsummary(category_plural, prior_category_plural, location, material, size, summary, 0.1, DEBUG)  
                
                    if output is None:
                        print ('\n\n\nERROR!!!!!!!!!!!\n\n\n')
                        output = "Physical objects"
    
                    # Now create a question to distinguish these things
                    output = self.p.question_thirty(output, obs.questions, 1.0, DEBUG)
            
            except Exception as e:
                print(self.tagman.tags)
                raise e
                    
                    
        elif obs.turnType == "guess":

            output = None
            output_type = 'things'
            
            # Create a factual statement with new information gleaned from the question-answer pair
            if obs.questions[-1] != "Is it Agent Alpha?" and not self.alpha_on:
                statement = self.p.reframe_as_statement(obs.questions[-1], obs.answers[-1], 0.1)
                statements, _ = self.tagman.get_statements_list()
                statements.append(statement)
                self.tagman.set_statements_list(statements)

                # Update auto-summary information with new fact obtained
                summary, _ = self.tagman.get_summary()
                new_summary = self.p.update_summary(summary, statement, 0.2, DEBUG)
                self.tagman.set_summary(new_summary)
            else:
                summary = "A thing."

            # Update modal data with new information received
            modality, _ = self.tagman.get_modality()

            # Get most recent answer
            answer = obs.answers[-1]

            # Get iterator
            iter, _ = self.tagman.get_iter()

            # Record begin answer
            if modality == 'begin':
                if answer.lower().strip() == 'yes' and AGENT_ALPHA: # Did answerer accept the handshake?
                    self.alpha_on = True
                    self.tagman.set_modality('first_word')
                else:
                    if PRECAT_ON:
                        self.tagman.set_modality('precat')
                    else:
                        self.tagman.set_modality('category')
                modality, _ = self.tagman.get_modality()

            elif modality == 'first_word':
                
                # Is the keyword earlier or later than the test word in alphabetical order?
                
                print(len(self.first_candidates))
                if len(self.first_candidates) == 4:
                    print(self.first_candidates)
                testword = self.testwords[-1]
                new_candidates = []
                if answer.lower().strip() == 'yes':
                    for word in self.first_candidates:
                        if word < testword:
                            new_candidates.append(word)
                        else:
                            break
                else:
                    for word in self.first_candidates:
                        if word >= testword:
                            new_candidates.append(word)
                self.first_candidates = new_candidates

                # If candidates reduced to one, then go on to second word modality
                if len(self.first_candidates) == 1:
                    print(f"Found single candidate word: {self.first_candidates[0]}\n")
                    self.tagman.set_modality('second_word')
                    self.first_word = self.first_candidates[0]
                    
                    # Automatically enumerate noun phrase candidate list
                    phrase_candidates = self.p.noun_phrases(self.first_word, self.tested_phrases).split(', ')
                    if len(phrase_candidates) == 1:
                        phrase_candidates = self.p.noun_phrases(self.first_word, self.tested_phrases).split(',')
                    print(phrase_candidates)

                    # Check validity of generated candidates
                    for candidate in [self.first_word] + phrase_candidates:
                        valid_c = self.valid_phrase(candidate, obs)
                        if valid_c is not None and valid_c not in self.candidate_phrases:
                            self.candidate_phrases.append(valid_c)
                    self.tested_phrases = self.tested_phrases + self.candidate_phrases
                
            elif modality == 'second_word':
                
                # See what information we gained and reduce our candidate set accordingly
                
                testword = self.testwords[-1].split(' ')[1]
                new_candidates = []
                new_phrases = []
                if answer.lower().strip() == 'yes':
                    for word in self.second_candidates:
                        if word < testword:
                            new_candidates.append(word)
                        else:
                            break
                    for phrase in self.candidate_phrases:
                        if phrase < self.testwords[-1]:
                            new_phrases.append(phrase)
                else:
                    for word in self.second_candidates:
                        if word >= testword:
                            new_candidates.append(word)
                    for phrase in self.candidate_phrases:
                        if phrase >= self.testwords[-1]:
                            new_phrases.append(phrase)
                self.second_candidates = new_candidates
                self.candidate_phrases = new_phrases  
                print(self.candidate_phrases)

            # DISABLED in final submission
            elif modality == 'precat':
                pass          

            # Record new information gained about taxonomic category
            elif modality == 'category':
                if "tested category" in self.tagman.tags.keys():
                    t_single, t_plural = self.tagman.get_tested_category()
                    if answer.lower().strip() == 'yes':
                        
                        # Set the category according to the answer we've found
                        self.tagman.set_category(t_single, t_plural)
                        self.tagman.set_last_positive_category(t_single, t_plural)
                        self.tagman.set_tested_category(t_single, t_plural)
                        self.tagman.delete_tag("alternate category") # Discard the alternate
                    else:
                        # Record this as a negative category
                        neg_single, neg_plural = self.tagman.get_negative_categories()
                        neg_single.append(t_single)
                        neg_plural.append(t_plural)
                        self.tagman.set_negative_categories(neg_single, neg_plural)
                        
            # Record new information gained about the location of the keyword
            elif modality == 'location':
                
                # Update location iter
                iter, _ = self.tagman.get_iter()
                locations_list, _ = self.tagman.get_locations_list()
                iter = int(iter)
                if obs.answers[-1] == 'yes':
                    self.tagman.set_location(locations_list[iter])
                self.tagman.set_iter(iter+1)
                
            # Record new information about the size of the keyword
            elif modality == 'size':
                iter, _ = self.tagman.get_iter()
                iter = int(iter)
                if obs.answers[-1] == 'yes':
                    self.tagman.set_size(size_list[iter])
                self.tagman.set_iter(iter+1)
            
            if DUMB_GUESSER:
                return "null guess"
            
            if DEBUG:
                print("GUESSER:\n")     


            ###################3
            # ALPHA CODE
            ###################3

            # Enumerate possible noun phrases based on a known first word in the keyword
            if self.first_word is not None and len(self.candidate_phrases) == 0:
                
                # Try and generate new candidate phrases
                phrase_candidates = self.p.noun_phrases(self.first_word, self.tested_phrases).split(', ')
                if len(phrase_candidates) == 1:
                    phrase_candidates = self.p.noun_phrases(self.first_word, self.tested_phrases).split(',')
                print(phrase_candidates)

                # Eliminate invalid candidates from consideration
                for candidate in [self.first_word] + phrase_candidates:
                    valid_c = self.valid_phrase(candidate, obs)
                    if valid_c is not None and valid_c not in self.candidate_phrases:
                        self.candidate_phrases.append(valid_c)
                self.tested_phrases = self.tested_phrases + self.candidate_phrases
                
            # If we have some candidate noun phrases, just use the first one
            if len(self.candidate_phrases) > 0:
                output = self.candidate_phrases[0]
                if len(self.candidate_phrases) > 1:
                    self.candidate_phrases = self.candidate_phrases[1:]
                else:
                    self.candidate_phrases = []
                return output

            # If we haven't found the first word of the keyword yet
            if self.alpha_on and self.first_word is None:
                
                # Find highest frequency word in the list and guess it
                best_word = None
                best_freq = 0
                for word in self.first_candidates:
                    if word in self.keyword_supply: # Ignore public KW
                        continue
                    if word in obs.guesses: # Ignore previous guesses
                        continue
                    if self.freq[word] > best_freq:
                        best_freq = self.freq[word]
                        best_word = word
                if best_word is not None:
                    return best_word
                else:
                    # We've already guessed all of the words, so start doing phrases using the most likely first word
                    # Get most likely first word first
                    # Still haven't proven the first word of the keyword yet, though

                    print("Guessed all words, start doing phrases")

                    # Use the highest frequency candidate first word as the seed for enumerating noun phrases
                    best_word = None
                    best_freq = 0
                    for word in self.first_candidates:
                        if self.freq[word] > best_freq:
                            best_freq = self.freq[word]
                            best_word = word

                    # Propose candidate noun phrases beginning with the most likely first word
                    phrase_candidates = self.p.noun_phrases(best_word, self.tested_phrases).split(', ')
                    if len(phrase_candidates) == 1:
                        phrase_candidates = self.p.noun_phrases(best_word, self.tested_phrases).split(',')
                    print(phrase_candidates)

                    # Check validity of generated candidates
                    for candidate in phrase_candidates:
                        valid_c = self.valid_phrase(candidate, obs)
                        print(valid_c)
                        if valid_c is not None:
                            print("Found phrase")
                            self.tested_phrases = self.tested_phrases + [valid_c]
                            return valid_c # Return first valid choice

            # If we run out of candidates during second word guessing
            if self.alpha_on and self.first_word is not None:
                
                # Find highest frequency word in the list and guess its phrase
                best_word = None
                best_freq = 0
                for word in self.second_candidates:
                    phrase = self.first_word + ' ' + word
                    if phrase in self.keyword_supply:
                        continue
                    if phrase in obs.guesses:
                        continue
                    if self.freq[word] > best_freq:
                        best_freq = self.freq[word]
                        best_word = phrase
                if best_word is not None:
                    return best_word
                    
                
            # Code for "THINGS" ##########################################
            
            # Get known information about category, location, and size
            pos_s, pos_p = self.tagman.get_positive_categories()
            if len(pos_p) <= 1:
                category = 'tangible objects'
            if len(pos_p) < 2:
                prior_category = 'things'
            if len(pos_p) >= 2:
                category = pos_p[-1]
                prior_category = pos_p[-2]
            location, _ = self.tagman.get_location()
            if location is None:
                location = ''
            size, _ = self.tagman.get_size()
            if size is None:
                size = ''

            # Material only used in precat mode
            material, _ = self.tagman.get_material()

            # Enumerate a list of ~30 candidate keywords based on known information
            summary, _ = self.tagman.get_summary()
            output = self.p.list_of_thirty_withsummary(category, prior_category, location, material, size, summary, 0.1, DEBUG)  

            # Guess one of the things or locations, avoiding duplicates
            entries = output.split(',')
            if len(entries) == 0:
                entries = ['(no guess)']
            output = None
            
            # Identify guesses which haven't been guessed before
            new_guesses = []
            for entry in entries:
                # print(entry + '\n')
                found = False
                entry = strip_accents(entry.strip().lower())

                # Fix a few misspellings or limitations of kaggle-supplied keyword list
                if entry in self.corrections.keys():
                    entry = self.corrections[entry]

                # Do some validity checks
                found = False
                for a in obs.guesses:
                    if entry.strip().lower() == a.strip().lower():
                        print(f"Skipping {entry.strip()}\n")
                        found = True
                    if KEYWORD_BIAS == 'exclude':
                        if entry.strip().lower() in self.corrections.keys():
                            entry_mod = self.corrections[entry.strip().lower()]
                        else:
                            entry_mod = entry
                        if entry_mod in self.keyword_supply:
                            print(f"Skipping {entry.strip()}\n")
                            found = True
                if not found:
                    new_guesses.append(entry.lower().strip())

            # Code to bias toward keyword supply list [DISABLED in final submission]
            print(f"Keyword bias {KEYWORD_BIAS}\n")
            if KEYWORD_BIAS == 'on': # Bias towards known keywords
                # Find first new guess which is a known keyword
                for guess in new_guesses:
                    if guess.lower().strip() in self.corrections.keys():
                        guess = self.corrections[guess]
                    if guess.lower().strip() in self.keyword_supply:
                        output = guess.lower().strip()
                        return output

            if output is None and len(new_guesses) > 0:
                output = new_guesses[0]
            else:
                output = entries[0]

        if output is not None:
            return output
        else:
            return "[No guess]"
       
        
        
    def answerer(self, obs):
           
        if DEBUG:
            print("ANSWERER:\n")
        keyword = obs.keyword

        # Debug code to override keyword for testing
        if self.fixed_keyword is not None:
            keyword = self.fixed_keyword

        # Do some one-time analysis of the grammar of the keyword itself
    
        if "proper name" not in self.tagman.tags.keys():
            # Is it a proper name?
            proper_name = self.p.proper_name(keyword)
            self.tagman.set_proper_name(proper_name)
            if proper_name.lower() == 'no':
                # Find the proper leading article
                if keyword[0].lower() in ['a', 'e', 'i', 'o', 'u', 'h']:
                    self.tagman.set_article(' an ')
                else:
                    self.tagman.set_article(' a ')

            # plural = self.p.plural(keyword)    
        
        # Preface the keyword with an appropriate article
        article, _ = self.tagman.get_article()
        kw_plus_art = article + keyword + ' '
            
        category = obs.category # things or place
        question = obs.questions[-1]

        print("Keyword: " + keyword + "\n")


        ############ Fixed pattern handler ##############
        if question.lower() == 'is it agent alpha?': # Indicate that we are smart about alphabetical order
            return 'yes'

        # Very specific pattern match for agent alpha
        if question.lower().find('Does the keyword (in lowercase) precede'.lower()) > -1 or question.lower().find('Does the keyword (in lowercase) come before'.lower()) > -1:
            print("Alpha pattern match\n")
            try:
                testword = question.split('"')[1].lower()
                print(f"Testword {testword}")
                if keyword.lower().strip() < testword.lower().strip():
                    return 'yes'
                else:
                    return 'no'
            except:
                pass

        # General pattern matching for other alpha-like agents

        # if self.p.alpha_check(question, 0.6, DEBUG) == 'yes': # Other lexicographical ordering question
        if 'lexicograph' in question.lower() or 'alphabetical' in question.lower() or 'sorting' in question.lower() or 'sort order' in question.lower():
            # testword = self.p.alpha_extract_word(question, 0.1, DEBUG) # Find the test word or letter
            print("Testing alphabetical order\n")
            m = re.findall("['\"].*['\"]", question.lower()) # Extract everything in quotes
            if len(m) > 0:
                testword = m[0][1:-1] # Strip the quotes
                print(f"Testword: {testword}\n")
                
                if question.lower().find(testword.lower()) > 0:                    
                    if 'before' in question.lower():
                        early_late = 'earlier'
                    elif 'earlier' in question.lower():
                        early_late = 'earlier'
                    elif 'after' in question.lower():
                        early_late = 'later'
                    elif 'smaller' in question.lower():
                        early_late = 'earlier'
                    elif 'precede' in question.lower():
                        early_late = 'earlier'
                    elif 'larger' in question.lower():
                        early_late = 'later'
                    elif 'lower' in question.lower():
                        print('here')
                        early_late = 'earlier'
                    elif 'higher' in question.lower():
                        early_late = 'later'
                    else:
                        early_late = self.p.alpha_earlier_later(question, test_word, 0.6, DEBUG)
                    response = None
                    if early_late.lower() == 'earlier':
                        response = keyword.lower().strip() < testword.lower().strip()
                    elif early_late.lower() == 'later':
                        response = keyword.lower().strip() > testword.lower().strip()
                    if response is not None:
                        if response:
                            return 'yes'
                        else:
                            return 'no'

        # Now see if we're looking at start letter or containing letters
        #if self.p.alpha_container_check(question, 0.6, DEBUG).lower() == 'yes':
        if ' letter ' in question.lower() or ' letters ' in question.lower() or ' letter:' in question.lower() or ' letters:' in question.lower() or ' letter?' in question.lower() or ' letters?' in question.lower():
            print("Regex match try\n")
            # testletters = self.p.alpha_extract_letters(question, 0.6, DEBUG).split(',')
            m = re.findall("['\"][a-z]*['\"]", question.lower()) # Extract everything in quotes
            if len(m) > 0:
                testletters = []
                for entry in m:
                    testletters.append(entry[1:-1])
            else:
                # Automatically extract the candidate letter list
                testletters = self.p.alpha_extract_letters(question, 0.6, DEBUG).split(',')
            if len(testletters) <= 25: # Sanity check
                start = None
                if 'includes' in question.lower():
                    begins_contains = 'contains'
                else:
                    # Determine if it is a "begins" or "contains" question
                    begins_contains = self.p.alpha_begins_contains(question, 0.6, DEBUG)
                    
                if begins_contains == 'begins':
                    start = True
                elif begins_contains == 'contains':
                    start = False    
                if start is not None:
                    for letter in testletters:
                        if start:
                            if letter.lower().strip() == keyword.lower().strip()[0]:
                                return 'yes'
                        else:
                            if letter.lower().strip() in keyword.lower().strip():
                                return 'yes'
                    return 'no' # Return no if not found

        # Now check if we're just confirming whether keyword is included in an explcit list or not
        # if self.p.alpha_explicit_list(question, 0.6, DEBUG).lower() == 'yes':
        test_phrases = ['keyword one of the following', 'is the thing precisely any of', 'keyword in the following']
        for phrase in test_phrases:
            if phrase.lower() in question.lower():
                print("List match\n")
                m = re.findall("['\"][a-z]*['\"]", question.lower()) # Extract everything in quotes
                if len(m) > 0:
                    list_contents = m
                else:
                    list_contents = self.p.alpha_extract_list(question, 0.6, DEBUG).split(',')
                print(f"List contents: {list_contents}\n")
                
                for item in list_contents:
                    if keyword.lower().strip() == item.lower().strip():
                        return 'yes'
                return 'no'

        # End Alpha handler #####################
        

        # Replace subject of the sentence with the keyword - big improvement in answer quality
        if 'keyword' in question.lower(): # Replace "keyword" with the actual keyword, if present in the question
            while 'keyword' in question.lower():
                for phrase in ['the keyword', 'keyword']:
                    while phrase in question.lower():
                        idx = question.lower().find(phrase)
                        if idx == -1:
                            break
                        new_q = ''
                        if idx > 0:
                            new_q += question[:idx]
                        new_q += kw_plus_art + question[idx+len(phrase):]
                        question = new_q
        else:
            # Find the actual subject word based on LLM assessment of the subject
            subject = ' ' + self.p.subject(question).strip() + ' '
            if question.lower().find(subject.lower()) > 0: # Found a subject
                for precedent in [' the ', ' a ', ' an ', '']:
                    phrase = precedent + subject.lower()
                    for i in range(0, 3):
                        if subject.lower() not in question.lower():
                            break                        
                        idx = question.lower().find(phrase)
                        if idx == -1:
                            break
                        new_q = ''
                        if idx > 0:
                            new_q += question[:idx] + " "
                        new_q += kw_plus_art.strip() + " " + question[idx+len(phrase):].strip()
                        question = new_q
            else:               
                # Default, risky, behavior
                # It once rephrased "Did a cantaloupe write 'Pride and Prejudice'?" to "Did Jane Austen, not a cantaloupe, write 'Pride and Prejudice'?" :)
                question = self.p.rephrase_with_kw(question, keyword, 0.1, DEBUG)
                
        # Answer the question
        output = self.p.answer_question(question , 0.1, DEBUG)

        return output       
    
    
robot = Robot(FIXED_KEYWORD)

########################
# Testing code
########################

def dumbo(obs, cfg):
    return "yes"

# Test code for alpha agent
def dumbo_alpha(obs, cfg):
    # Asks alpha related questions
    if obs.turnType == 'ask':
        test_type = random.choice(('starts', 'contains', 'list', 'alpha'))
        letters = []
        count = random.choice((1, 2, 3))
        alpha = random.choice(('bird', 'llama', 'zebra', 'washington dca'))
        wordlist = "'bird', 'monkey', 'digeroo'"
        for i in range(0,count):
            letters.append(random.choice(('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z')))
        letter_str = f"'{letters[0]}'"
        if len(letters) > 1:
            add_str = "', '".join(letters)
            letter_str = f"'{add_str}'"
            
        if test_type == 'starts':
            output = f"Does the keyword begin with any of these letters: {letter_str}?"
        if test_type == 'contains':
            output = f"Does the keyword contain any of these letters: {letter_str}?"
        if test_type == 'list':
            output = f"Is the keyword in the following list: {wordlist}?"
        if test_type == 'alpha':
            output = f"Is the keyword lexicographically smaller than \"{alpha}\"?"
        print(output +'\n')
        return output
            
    if obs.turnType == 'guess':
        return 'random guess'

def one_round(env):
    if env.state[0].status == "ACTIVE":
        agent1_action = agent(env.state[0].observation, CFG)
    else:
        agent1_action = ''
        
    if env.state[1].status == "ACTIVE":
        agent2_action = agent(env.state[1].observation, CFG)
    else:
        agent2_action = ''
    
    if env.state[2].status == "ACTIVE":
        agent3_action = dumbo(env.state[2].observation, CFG)
    else:
        agent3_action = ''
    
    if env.state[3].status == "ACTIVE":
        agent4_action = dumbo(env.state[3].observation, CFG)
    else:
        agent4_action = ''
    state = env.step([agent1_action, agent2_action, agent3_action, agent4_action])

import statistics

# Run batch evaluation for a word list
def eval(wordlist, kw_list=None):
    from kaggle_environments import make
    success_count = 0
    for word in wordlist:   
        rounds = 0
        env = make("llm_20_questions", debug=True)
        env.reset()
        robot.reset(word)
        print(robot.tagman.tags)
        if kw_list is not None:
            robot.keyword_supply = [x for x in kw_list]
        print(robot.keyword_supply)
        round_wins = []
        while(True):
            try:
                rounds += 1
                one_round(env)
                if len(env.state[1]['observation']['guesses']) > 0 and compare_words(env.state[1]['observation']['guesses'][-1], word):
                    success_count += 1
                    print("\n******SUCCESS*******\n")
                    break
            except Exception as e:
                print("\n+++++++++FAILURE++++++++\n")
                # raise e
                break
        round_wins.append(rounds)
        # env.reset()
    print(f"\nOverall success rate: {success_count / len(wordlist)}")
    print(f"\nOverall avg rounds: {statistics.mean(round_wins)}")

    
# Main calling function for the agent
def agent(obs, cfg):    
    if obs.turnType =="ask":
        response = robot.on(mode = "asking", obs = obs) 
        if len(response) > 750:
            response = response[:750]
    elif obs.turnType =="guess":
        response = robot.on(mode = "guessing", obs = obs)    
        if len(response) > 100:
            response = response[:100]
    elif obs.turnType =="answer":
        response = robot.on(mode = "answering", obs = obs)  
        if response.lower() != 'yes' and response.lower() != 'no':
            response = 'no'
    if response == None or len(response)<=1:
        response = "no"        
    return response


In [None]:
from importlib.metadata import version
version("numpy")

In [None]:
!apt install pigz pv > /dev/null

In [None]:
# !tar --use-compress-program='pigz --fast --recursive | pv' -cf submission.tar.gz -C /kaggle/input/llama-3/transformers/8b-chat-hf . -C /kaggle/working/submission . -C /kaggle/tmp/ .
!tar --use-compress-program='pigz --fast --recursive | pv' -cf submission.tar.gz -C /kaggle/working/submission . -C /kaggle/tmp/ .