### Prerequisites
Install HuggingFace packages and create submission directory.

In [1]:
!mkdir /kaggle/working/submission

In [2]:
%%time
import os,sys
os.system("pip install -q -U -t /kaggle/working/submission/lib accelerate transformers bitsandbytes")
os.system("pip cache purge")
sys.path.insert(0, "/kaggle/working/submission/lib")

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 24.6.1 requires cubinlinker, which is not installed.
cudf 24.6.1 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 24.6.1 requires ptxcompiler, which is not installed.
cuml 24.6.1 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 24.6.1 requires cupy-cuda11x>=12.0.0, which is not installed.
keras-cv 0.9.0 requires keras-core, which is not installed.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
ucx-py 0.38.0 requires libucx<1.16,>=1.15.0, which is not installed.
ucxx 0.38.0 requires libucx>=1.15.0, which is not installed.
apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.8 which is incompatible.
apache-beam 2.46.0 requires numpy<1.25.0,>=1.14.3, but you have numpy 1.26.4 which is incompatible.
apache-beam 2.46.0 requires pyarr

Files removed: 242
CPU times: user 11.7 ms, sys: 4.73 ms, total: 16.5 ms
Wall time: 2min 53s


### HuggingFace Login

Add HugginFace access token to secrets. You can find it in `Add-ons -> secrets`

In [3]:
import huggingface_hub
from kaggle_secrets import UserSecretsClient

huggingface_hub.login(token=UserSecretsClient().get_secret("HF_TOKEN"))

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


### Download Model via HuggingFace
In this notebook, we are using gemma-2-9b model with 4-bit quantization.

In [4]:
# pip install accelerate
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

model_id = "google/gemma-2-9b-it"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quanty_type = "fp4", 
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quanty = True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config = bnb_config,
    torch_dtype = torch.float16,
    device_map = "auto",
    trust_remote_code = True,
)

tokenizer = AutoTokenizer.from_pretrained(model_id)


Unused kwargs: ['bnb_4bit_quanty_type', 'bnb_4bit_use_double_quanty']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


config.json:   0%|          | 0.00/857 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/39.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/40.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

### Save Model
Save the loaded model and tokenizer in the submission directory.
Remove the model and tokenizer from the memory.

In [5]:
model.save_pretrained("/kaggle/working/submission/model")
tokenizer.save_pretrained("/kaggle/working/submission/model")

('/kaggle/working/submission/model/tokenizer_config.json',
 '/kaggle/working/submission/model/special_tokens_map.json',
 '/kaggle/working/submission/model/tokenizer.model',
 '/kaggle/working/submission/model/added_tokens.json',
 '/kaggle/working/submission/model/tokenizer.json')

In [6]:
import gc, torch
del model, tokenizer
gc.collect()
torch.cuda.empty_cache()

## Agent

In [7]:
%%writefile /kaggle/working/submission/main.py
# Setup
import os
import sys

# **IMPORTANT:** Set up your system path like this to make your code work
# both in notebooks and in the simulations environment.


KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
if os.path.exists(KAGGLE_AGENT_PATH):
    sys.path.insert(0, os.path.join(KAGGLE_AGENT_PATH, 'lib'))
else:
    sys.path.insert(0, "/kaggle/working/submission/lib")

import contextlib
import os
import sys
from pathlib import Path

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

if os.path.exists(KAGGLE_AGENT_PATH):
    MODEL_PATH = os.path.join(KAGGLE_AGENT_PATH, "model")
else:
    MODEL_PATH = "/kaggle/working/submission/model"

# Prompt Formatting
import itertools
from typing import Iterable


class GemmaFormatter:
    _start_token = '<start_of_turn>'
    _end_token = '<end_of_turn>'

    def __init__(self, system_prompt: str = None, few_shot_examples: Iterable = None):
        self._system_prompt = system_prompt
        self._few_shot_examples = few_shot_examples
        self._turn_user = f"{self._start_token}user\n{{}}{self._end_token}\n"
        self._turn_model = f"{self._start_token}model\n{{}}{self._end_token}\n"
        self.reset()

    def __repr__(self):
        return self._state

    def user(self, prompt):
        self._state += self._turn_user.format(prompt)
        return self

    def model(self, prompt):
        self._state += self._turn_model.format(prompt)
        return self

    def start_user_turn(self):
        self._state += f"{self._start_token}user\n"
        return self

    def start_model_turn(self):
        self._state += f"{self._start_token}model\n"
        return self

    def end_turn(self):
        self._state += f"{self._end_token}\n"
        return self

    def reset(self):
        self._state = ""
        if self._system_prompt is not None:
            self.user(self._system_prompt)
        if self._few_shot_examples is not None:
            self.apply_turns(self._few_shot_examples, start_agent='user')
        return self

    def apply_turns(self, turns: Iterable, start_agent: str):
        formatters = [self.model, self.user] if start_agent == 'model' else [self.user, self.model]
        formatters = itertools.cycle(formatters)
        for fmt, turn in zip(formatters, turns):
            fmt(turn)
        return self


# Agent Definitions
import re


@contextlib.contextmanager
def _set_default_tensor_type(dtype: torch.dtype):
    """Set the default torch dtype to the given dtype."""
    torch.set_default_dtype(dtype)
    yield
    torch.set_default_dtype(torch.float)


class GemmaAgent:
    def __init__(self, model_path=MODEL_PATH, device='cuda:0', system_prompt=None, few_shot_examples=None):
        self._device = torch.device(device)
        self.formatter = GemmaFormatter(system_prompt=system_prompt, few_shot_examples=few_shot_examples)

        print("Initializing model")
        self.model = AutoModelForCausalLM.from_pretrained(
            model_path,
            device_map = "auto",
        )

        self.tokenizer = AutoTokenizer.from_pretrained(model_path)

    def __call__(self, obs, *args):
        self._start_session(obs)
        prompt = str(self.formatter)
        response = self._call_llm(prompt)
        response = self._parse_response(response, obs)
        print(f"{response=}")
        return response

    def _start_session(self, obs: dict):
        raise NotImplementedError

    def _call_llm(self, prompt, max_new_tokens=32, **sampler_kwargs):
        if sampler_kwargs is None:
            sampler_kwargs = {
                'temperature': 0.01,
                'top_p': 0.1,
                'top_k': 1,
        }
        input_ids = self.tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = self.model.generate(**input_ids, max_new_tokens=max_new_tokens, kwargs=sampler_kwargs)
        return self.tokenizer.decode(outputs[0])

    def _parse_keyword(self, response: str):
        match = re.search(r"(?<=\*\*)([^*]+)(?=\*\*)", response)
        if match is None:
            keyword = ''
        else:
            keyword = match.group().lower()
        return keyword

    def _parse_response(self, response: str, obs: dict):
        raise NotImplementedError


def interleave_unequal(x, y):
    return [
        item for pair in itertools.zip_longest(x, y) for item in pair if item is not None
    ]


class GemmaQuestionerAgent(GemmaAgent):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def _start_session(self, obs):
        self.formatter.reset()
        self.formatter.user("Let's play 20 Questions. You are playing the role of the Questioner.")
        turns = interleave_unequal(obs.questions, obs.answers)
        self.formatter.apply_turns(turns, start_agent='model')
        if obs.turnType == 'ask':
            self.formatter.user("Please ask a yes-or-no question.")
        elif obs.turnType == 'guess':
            self.formatter.user("Now guess the keyword. Surround your guess with double asterisks.")
        self.formatter.start_model_turn()

    def _parse_response(self, response: str, obs: dict):
        if obs.turnType == 'ask':
            match = re.search(".+?\?", response.replace('*', ''))
            if match is None:
                question = "Is it a place?"
            else:
                question = match.group()
            return question
        elif obs.turnType == 'guess':
            guess = self._parse_keyword(response)
            return guess
        else:
            raise ValueError("Unknown turn type:", obs.turnType)


class GemmaAnswererAgent(GemmaAgent):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def _start_session(self, obs):
        self.formatter.reset()
        self.formatter.user(f"Let's play 20 Questions. You are playing the role of the Answerer. The keyword is {obs.keyword} in the category {obs.category}.")
        turns = interleave_unequal(obs.questions, obs.answers)
        self.formatter.apply_turns(turns, start_agent='user')
        self.formatter.user(f"The question is about the keyword {obs.keyword} in the category {obs.category}. Give yes-or-no answer and surround your answer with double asterisks, like **yes** or **no**.")
        self.formatter.start_model_turn()

    def _parse_response(self, response: str, obs: dict):
        answer = self._parse_keyword(response)
        return 'yes' if 'yes' in answer else 'no'


# Agent Creation
system_prompt = "You are an AI assistant designed to play the 20 Questions game. In this game, the Answerer thinks of a keyword and responds to yes-or-no questions by the Questioner. The keyword is a specific person, place, or thing."

few_shot_examples = [
    "Let's play 20 Questions. You are playing the role of the Questioner. Please ask your first question.",
    "Is it a thing?", "**no**",
    "Is is a place?", "**yes**",
    "Is it a country?", "**yes**",
    "Does it start with f?", "**yes** Now guess the keyword.",
    "**France**", "Correct!",
]


# **IMPORTANT:** Define agent as a global so you only have to load
# the agent you need. Loading both will likely lead to OOM.
agent = None


def get_agent(name: str):
    global agent
    
    if agent is None and name == 'questioner':
        agent = GemmaQuestionerAgent(
            device='cuda:0',
            system_prompt=system_prompt,
            few_shot_examples=few_shot_examples,
        )
    elif agent is None and name == 'answerer':
        agent = GemmaAnswererAgent(
            device='cuda:0',
            system_prompt=system_prompt,
            few_shot_examples=few_shot_examples,
        )
    assert agent is not None, "Agent not initialized."

    return agent


def agent_fn(obs, cfg):
    if obs.turnType == "ask":
        response = get_agent('questioner')(obs)
    elif obs.turnType == "guess":
        response = get_agent('questioner')(obs)
    elif obs.turnType == "answer":
        response = get_agent('answerer')(obs)
    if response is None or len(response) <= 1:
        return "yes"
    else:
        return response

Writing /kaggle/working/submission/main.py


In [8]:
!apt install pigz pv > /dev/null





In [9]:
!tar --use-compress-program='pigz --fast --recursive | pv' -cf /kaggle/working/submission.tar.gz -C /kaggle/working/submission .

8.11GiB 0:03:16 [42.2MiB/s] [  <=>                                             ]


## Simulate Game

### Load test data
Download the latest keywords.py from [kaggle-environments](https://github.com/Kaggle/kaggle-environments/blob/master/kaggle_environments/envs/llm_20_questions/keywords.py) github repo

In [10]:
%%bash

wget -O keywords.py https://raw.githubusercontent.com/Kaggle/kaggle-environments/master/kaggle_environments/envs/llm_20_questions/keywords.py
mkdir -p /kaggle/working/simulation/

--2024-07-21 04:25:05--  https://raw.githubusercontent.com/Kaggle/kaggle-environments/master/kaggle_environments/envs/llm_20_questions/keywords.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 93552 (91K) [text/plain]
Saving to: 'keywords.py'

     0K .......... .......... .......... .......... .......... 54% 3.00M 0s
    50K .......... .......... .......... .......... .         100% 27.6M=0.02s

2024-07-21 04:25:05 (5.03 MB/s) - 'keywords.py' saved [93552/93552]



In [11]:
import json
import pandas as pd
import numpy as np
from keywords import KEYWORDS_JSON

def create_keyword_df(KEYWORDS_JSON):
    json_data = json.loads(KEYWORDS_JSON)

    keyword_list = []
    category_list = []
    alts_list = []

    for i in range(len(json_data)):
        for j in range(len(json_data[i]['words'])):
            keyword = json_data[i]['words'][j]['keyword']
            keyword_list.append(keyword)
            category_list.append(json_data[i]['category'])
            alts_list.append(json_data[i]['words'][j]['alts'])

    data_pd = pd.DataFrame(columns=['keyword', 'category', 'alts'])
    data_pd['keyword'] = keyword_list
    data_pd['category'] = category_list
    data_pd['alts'] = alts_list
    
    return data_pd


In [12]:
keywords = create_keyword_df(KEYWORDS_JSON)
# keywords_df.head(5)
keywords.tail(5)

Unnamed: 0,keyword,category,alts
1137,rhine,place,[]
1138,yangtze river,place,"[changjiang, yangtze]"
1139,yellow river,place,[huang he]
1140,zambezi river,place,[zambezi]
1141,yenisei river,place,[yenisei]


In [13]:
keywords["category"].unique()

array(['things', 'place'], dtype=object)

In [14]:
keywords.to_csv("/kaggle/working/simulation/keywords.csv", index=False)

### Create Agents
2 vs 2

In [15]:
%%writefile /kaggle/working/simulation/agent1.py

import pandas as pd
import numpy as np

keywords = pd.read_csv("/kaggle/working/simulation/keywords.csv").keyword.values

def agent_fn(obs, cfg):
    global keywords
    
    # DISPLAY ROUND NUMBER
    k = len( obs.questions )
    if obs.turnType == "ask":
        print()
        print("#"*25)
        print(f"### Round {k+1}")
        print("#"*25)

    # DISPLAY AGENT NAME AND JSON INPUT
    name = "Team 1 - Questioner - Agent Random"
    print(f"\n{name}\nINPUT =",obs)
    
    # GENERATE RESPONSE
    keyword = np.random.choice(keywords)
    if obs.turnType == "ask":
        response = f"Is it {keyword}?"
    else: #obs.turnType == "guess"
        response = keyword
        if obs.answers[-1] == "yes":
            response = obs.questions[-1].rsplit(" ",1)[1][:-1]
    print(f"OUTPUT = '{response}'")

    return response

Writing /kaggle/working/simulation/agent1.py


In [16]:
%%writefile /kaggle/working/simulation/agent2.py

import numpy as np

def agent_fn(obs, cfg):
    
    # DISPLAY AGENT NAME AND JSON INPUT
    name = "Team 1 - Answerer - Agent Random"
    print(f"\n{name}\nINPUT =",obs)
    
    # GENERATE RESPONSE
    response = "no"
    #response = np.random.choice(["yes","no"])
    if obs.keyword.lower() in obs.questions[-1].lower():
        response = "yes"
    print(f"OUTPUT = '{response}'")

    return response

Writing /kaggle/working/simulation/agent2.py


In [17]:
%%writefile /kaggle/working/simulation/agent3.py

import pandas as pd
import numpy as np

keywords = pd.read_csv("/kaggle/working/simulation/keywords.csv").keyword.values

def agent_fn(obs, cfg):
    global keywords
    
    # DISPLAY AGENT NAME AND JSON INPUT
    name = "Team 2 - Questioner - Agent Random"
    print(f"\n{name}\nINPUT =",obs)
    
    # GENERATE RESPONSE
    keyword = np.random.choice(keywords)
    if obs.turnType == "ask":
        response = f"Is it {keyword}?"
    else: #obs.turnType == "guess"
        response = keyword
        if obs.answers[-1] == "yes":
            response = obs.questions[-1].rsplit(" ",1)[1][:-1]
    print(f"OUTPUT = '{response}'")

    return response

Writing /kaggle/working/simulation/agent3.py


In [18]:
%%writefile /kaggle/working/simulation/agent4.py

import numpy as np

def agent_fn(obs, cfg):
    
    # DISPLAY AGENT NAME AND JSON INPUT
    name = "Team 2 - Answerer - Agent Random"
    print(f"\n{name}\nINPUT =",obs)
    
    # GENERATE RESPONSE
    response = "no"
    #response = np.random.choice(["yes","no"])
    if obs.keyword.lower() in obs.questions[-1].lower():
        response = "yes"
    print(f"OUTPUT = '{response}'")

    return response

Writing /kaggle/working/simulation/agent4.py


### Create Environment

In [19]:
!pip install -q pygame

In [None]:
GEMMA_AS_QUESTIONER = True
GEMMA_AS_ANSWERER = True

from kaggle_environments import make
env = make("llm_20_questions", debug=True)

# TEAM 1
agent1 = "/kaggle/working/simulation/agent1.py"
agent2 = "/kaggle/working/simulation/agent2.py"

# TEAM 2 - QUESTIONER
agent3 = "/kaggle/working/simulation/agent3.py"
if GEMMA_AS_QUESTIONER:
    agent3 = "/kaggle/working/submission/main.py"
    
# TEAM 2 - ANSWERER
agent4 = "/kaggle/working/simulation/agent4.py"
if GEMMA_AS_ANSWERER:
    agent4 = "/kaggle/working/submission/main.py"
    
env.reset()
log = env.run([agent1, agent2, agent3, agent4])

env.render(mode="ipython", width=600, height=500)

import gc, torch
del make, env, log
gc.collect()
torch.cuda.empty_cache()

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.



#########################
### Round 1
#########################

Team 1 - Questioner - Agent Random
INPUT = {'remainingOverageTime': 300, 'step': 0, 'questions': [], 'guesses': [], 'answers': [], 'role': 'guesser', 'turnType': 'ask', 'keyword': '', 'category': ''}
OUTPUT = 'Is it Duct tape?'


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

2024-07-21 04:26:35.933292: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-21 04:26:35.933390: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-21 04:26:36.075130: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Initializing model
response='Is it a thing?'

Team 1 - Answerer - Agent Random
INPUT = {'remainingOverageTime': 300, 'questions': ['Is it Duct tape?'], 'guesses': [], 'answers': [], 'role': 'answerer', 'turnType': 'answer', 'keyword': 'miami florida', 'category': 'place', 'step': 1}
OUTPUT = 'no'


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Initializing model
response='no'

Team 1 - Questioner - Agent Random
INPUT = {'remainingOverageTime': 300, 'step': 2, 'questions': ['Is it Duct tape?'], 'guesses': [], 'answers': ['no'], 'role': 'guesser', 'turnType': 'guess', 'keyword': '', 'category': ''}
OUTPUT = 'Ointment'
response='no'

#########################
### Round 2
#########################

Team 1 - Questioner - Agent Random
INPUT = {'remainingOverageTime': 300, 'step': 3, 'questions': ['Is it Duct tape?'], 'guesses': ['Ointment'], 'answers': ['no'], 'role': 'guesser', 'turnType': 'ask', 'keyword': '', 'category': ''}
OUTPUT = 'Is it wellington new zealand?'
