In [1]:
%%bash
mkdir -p /kaggle/working/submission
mkdir -p /tmp/model
pip install -q bitsandbytes accelerate
pip install -qU transformers

In [2]:
from kaggle_secrets import UserSecretsClient
secrets = UserSecretsClient()

HF_TOKEN: str | None  = None

try:
    HF_TOKEN = secrets.get_secret("HF_TOKEN")
except:
    pass

In [3]:
from huggingface_hub import snapshot_download
from pathlib import Path
import shutil

g_model_path = Path("/tmp/model")
if g_model_path.exists():
    shutil.rmtree(g_model_path)
g_model_path.mkdir(parents=True)

snapshot_download(
    repo_id="abacusai/Llama-3-Smaug-8B",
    ignore_patterns="original*",
    local_dir=g_model_path,
    token=globals().get("HF_TOKEN", None)
)

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/449 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

'/tmp/model'

In [4]:
!ls -l /tmp/model

total 15693124
-rw-r--r-- 1 root root       1728 Jul 29 16:27 README.md
-rw-r--r-- 1 root root        649 Jul 29 16:27 config.json
-rw-r--r-- 1 root root        121 Jul 29 16:27 generation_config.json
-rw-r--r-- 1 root root 4976698672 Jul 29 16:28 model-00001-of-00004.safetensors
-rw-r--r-- 1 root root 4999802720 Jul 29 16:28 model-00002-of-00004.safetensors
-rw-r--r-- 1 root root 4915916176 Jul 29 16:28 model-00003-of-00004.safetensors
-rw-r--r-- 1 root root 1168138808 Jul 29 16:27 model-00004-of-00004.safetensors
-rw-r--r-- 1 root root      23950 Jul 29 16:27 model.safetensors.index.json
-rw-r--r-- 1 root root        449 Jul 29 16:27 special_tokens_map.json
-rw-r--r-- 1 root root    9084463 Jul 29 16:27 tokenizer.json
-rw-r--r-- 1 root root      51016 Jul 29 16:27 tokenizer_config.json


In [5]:
# load model on memory
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

downloaded_model = "/tmp/model"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    downloaded_model,
    quantization_config = bnb_config,
    torch_dtype = torch.float16,
    device_map = "auto",
    trust_remote_code = True,
)

tokenizer = AutoTokenizer.from_pretrained(downloaded_model)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [6]:
# save model in submission directory
model.save_pretrained("/kaggle/working/submission/model")
tokenizer.save_pretrained("/kaggle/working/submission/model")

('/kaggle/working/submission/model/tokenizer_config.json',
 '/kaggle/working/submission/model/special_tokens_map.json',
 '/kaggle/working/submission/model/tokenizer.json')

In [7]:
# unload model from memory
import gc, torch
del model, tokenizer
gc.collect()
torch.cuda.empty_cache()

In [8]:
%%writefile submission/questions.py

BasicQuestions = [
    "is it a place?",
    # "is it a thing?",
]

PlaceQuestions = [
    "is it a country?",
    "is it a city?",
    "is it a natural feature?",
    # "is it a mountain?",
    # "is it a river?",
]

ThingsQuestions = [
    "is it a living thing?",   
    "is it edible?",           
    "is it something that can be held in your hand?",
    "Does it require electricity to operate?",
    # "Would the keyword be included in the broad category of [Group]?",
]

Writing submission/questions.py


In [9]:
%%writefile submission/rulebased.py

from questions import *


class RuleBasedQuestions:
    def __init__(self):
        """
        Attributes:
            log (list): A list to store the user's answers.
            count (int): The count of questions asked.
            enabled (bool): Indicates if all questions have been asked.
            category (str): The current category of questions.
        """
        self.log = []
        self.count = 0
        self.enabled = True
        self.category = "basic"

    def getQuestion(self):
        """
        Returns the next question based on the current state of the game.

        Returns:
            str: The next question to be asked.
        """
        if self.enabled == False:
            return "No more available questions."
        if self.category == "basic":
            return BasicQuestions[self.count]
        elif self.category == "place":
            return PlaceQuestions[self.count - len(BasicQuestions)]
        elif self.category == "things":
            return ThingsQuestions[self.count - len(BasicQuestions)]

    def logAnswer(self, answer):
        """
        Logs the user's answer and updates the category and count based on the answer.

        Parameters:
        - answer (str): The user's answer, either "yes" or "no".

        Returns:
        None
        """
        answer_yes = True
        if "no" in answer.lower():
            answer_yes = False
        self.log.append(answer_yes)

        # determine the category by first answer
        if self.count == 0:
            self.category = "place" if answer_yes else "things"
        self.count += 1

        if self.category == "basic": 
            pass
        elif self.category == "place":
            if answer_yes or self.count == len(BasicQuestions) + len(PlaceQuestions):
                self.enabled = False
        elif self.category == "things":
            if self.count == len(BasicQuestions) + len(ThingsQuestions):
                self.enabled = False

    def reset(self):
        self.log = []
        self.count = 0
        self.enabled = True
        self.category = "basic"

Writing submission/rulebased.py


In [10]:
%%writefile submission/prompts.py

def asker_sys_prompt(category):
    prompt = f"""You are a helpful AI assistant with expertise in playing 20 questions game.
Your task is to ask questions to the user to guess the word the user is thinking of.
The keyword is of category: "{category}"
Narrow down the possibilities by asking yes/no questions.
Think step by step and try to ask the most informative questions.
\n"""
    return prompt


def guesser_sys_prompt(category):
    prompt = f"""You are a helpful AI assistant with expertise in playing 20 questions game.
Your task is to guess the word the user is thinking of.
The keyword is of category: "{category}"
Think step by step.
\n"""
    return prompt


def answerer_sys_prompt(keyword, category):
    prompt = f"""ou are a helpful AI assistant with expertise in playing 20 questions game.
Your task is to answer the questions of the user to help him guess the word you're thinking of.
Your answers must be 'yes' or 'no'.
The keyword is: "{keyword}", it is of category: "{category}"
Provide accurate answers to help the user to guess the keyword.
"""
    return prompt

Writing submission/prompts.py


In [11]:
%%writefile submission/main.py
# comment magic command before simulation

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import sys

from prompts import *
from rulebased import RuleBasedQuestions

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)


KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
if os.path.exists(KAGGLE_AGENT_PATH):
    MODEL_PATH = os.path.join(KAGGLE_AGENT_PATH, "model")
else:
    MODEL_PATH = "/kaggle/working/submission/model"


model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
id_eot = tokenizer.convert_tokens_to_ids(["<|eot_id|>"])[0]


def generate_answer(template):
    inp_ids = tokenizer(template, return_tensors="pt").to("cuda")
    out_ids = model.generate(**inp_ids, max_new_tokens=15).squeeze()
    start_gen = inp_ids.input_ids.shape[1]
    out_ids = out_ids[start_gen:]
    if id_eot in out_ids:
        stop = out_ids.tolist().index(id_eot)
        out = tokenizer.decode(out_ids[:stop])
    else:
        out = tokenizer.decode(out_ids)
    return out


class Robot:
    def __init__(self):
        self.RuleBasedAgent = RuleBasedQuestions()
        
        # To disable the rule-based agent, uncomment the following line
        # self.RuleBasedAgent.enabled = False

    def on(self, mode, obs):
        assert mode in [
            "asking", "guessing", "answering",
        ], "mode can only take one of these values: asking, answering, guessing"
        if mode == "asking":
            # launch the asker role
            output = self.asker(obs)
        if mode == "answering":
            # launch the answerer role
            output = self.answerer(obs)
            if "yes" in output.lower():
                output = "yes"
            elif "no" in output.lower():
                output = "no"
            if "yes" not in output.lower() and "no" not in output.lower():
                output = "yes"
        if mode == "guessing":
            # launch the guesser role
            output = self.guesser(obs)
        return output

    def asker(self, obs):
        if self.RuleBasedAgent.enabled:
            question = self.RuleBasedAgent.getQuestion()
            return question

        ask_prompt = asker_sys_prompt(self.RuleBasedAgent.category)
        chat_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{ask_prompt}<|eot_id|>"""
        chat_template += "<|start_header_id|>assistant<|end_header_id|>\n\n"
        if len(obs.questions) >= 1:
            for q, a in zip(obs.questions, obs.answers):
                chat_template += (
                    f"{q}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
                )
                chat_template += (
                    f"{a}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
                )

        output = generate_answer(chat_template)
        return output

    def guesser(self, obs):
        if self.RuleBasedAgent.enabled:
            self.RuleBasedAgent.logAnswer(obs.answers[-1])

        conv = ""
        for q, a in zip(obs.questions, obs.answers):
            conv += f"""Question: {q}\nAnswer: {a}\n"""
        guess_prompt = (
            guesser_sys_prompt(self.RuleBasedAgent.category)
            + f"""so far, the current state of the game is as following:\n{conv}
        based on the conversation, can you guess the word, please give only the word, no verbosity around"""
        )
        chat_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{guess_prompt}<|eot_id|>"""
        chat_template += "<|start_header_id|>assistant<|end_header_id|>\n\n"

        output = generate_answer(chat_template)
        return output

    def answerer(self, obs):

        chat_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{answerer_sys_prompt(obs.keyword, obs.category)}<|eot_id|>"""
        chat_template += "<|start_header_id|>user<|end_header_id|>\n\n"
        chat_template += f"{obs.questions[0]}<|eot_id|>"
        chat_template += "<|start_header_id|>assistant<|end_header_id|>\n\n"
        if len(obs.answers) >= 1:
            for q, a in zip(obs.questions[1:], obs.answers):
                chat_template += (
                    f"{a}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
                )
                chat_template += (
                    f"{q}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
                )
        output = generate_answer(chat_template)
        return output


robot = Robot()


def agent(obs, cfg):

    if obs.turnType == "ask":
        response = robot.on(mode="asking", obs=obs)

    elif obs.turnType == "guess":
        response = robot.on(mode="guessing", obs=obs)

    elif obs.turnType == "answer":
        response = robot.on(mode="answering", obs=obs)

    if response == None or len(response) <= 1:
        response = "yes"

    return response

Writing submission/main.py


In [12]:
!apt install pigz pv > /dev/null





In [13]:
!tar --use-compress-program='pigz --fast --recursive | pv' -cf submission.tar.gz -C /kaggle/working/submission .

4.73GiB 0:01:35 [50.7MiB/s] [   <=>                                            ]
