In [1]:
%%bash
mkdir -p /kaggle/working/submission
mkdir -p /tmp/model
pip install -q bitsandbytes accelerate
pip install -qU transformers

In [3]:
from kaggle_secrets import UserSecretsClient
secrets = UserSecretsClient()

HF_TOKEN: str | None  = None

try:
    HF_TOKEN = secrets.get_secret("HF_TOKEN")
except:
    pass

In [4]:
from huggingface_hub import snapshot_download
from pathlib import Path
import shutil

g_model_path = Path("/tmp/model")
if g_model_path.exists():
    shutil.rmtree(g_model_path)
g_model_path.mkdir(parents=True)

snapshot_download(
    repo_id="Qwen/Qwen2-7B-Instruct",
    ignore_patterns="original*",
    local_dir=g_model_path,
    token=globals().get("HF_TOKEN", None)
)

Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

LICENSE:   0%|          | 0.00/11.3k [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/6.55k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/27.8k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

'/tmp/model'

In [5]:
!ls -l /tmp/model

total 14885580
-rw-r--r-- 1 root root      11344 Jul 24 07:57 LICENSE
-rw-r--r-- 1 root root       6551 Jul 24 07:57 README.md
-rw-r--r-- 1 root root        663 Jul 24 07:57 config.json
-rw-r--r-- 1 root root        243 Jul 24 07:57 generation_config.json
-rw-r--r-- 1 root root    1671839 Jul 24 07:57 merges.txt
-rw-r--r-- 1 root root 3945426872 Jul 24 07:58 model-00001-of-00004.safetensors
-rw-r--r-- 1 root root 3864726352 Jul 24 07:58 model-00002-of-00004.safetensors
-rw-r--r-- 1 root root 3864726408 Jul 24 08:00 model-00003-of-00004.safetensors
-rw-r--r-- 1 root root 3556392240 Jul 24 07:58 model-00004-of-00004.safetensors
-rw-r--r-- 1 root root      27752 Jul 24 07:57 model.safetensors.index.json
-rw-r--r-- 1 root root    7028015 Jul 24 07:57 tokenizer.json
-rw-r--r-- 1 root root       1288 Jul 24 07:57 tokenizer_config.json
-rw-r--r-- 1 root root    2776833 Jul 24 07:57 vocab.json


In [6]:
# load model on memory
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

downloaded_model = "/tmp/model"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    downloaded_model,
    quantization_config = bnb_config,
    torch_dtype = torch.float16,
    device_map = "auto",
    trust_remote_code = True,
)

tokenizer = AutoTokenizer.from_pretrained(downloaded_model)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [11]:
# save model in submission directory
model.save_pretrained("/kaggle/working/submission/model")
tokenizer.save_pretrained("/kaggle/working/submission/model")

('/kaggle/working/submission/model/tokenizer_config.json',
 '/kaggle/working/submission/model/special_tokens_map.json',
 '/kaggle/working/submission/model/vocab.json',
 '/kaggle/working/submission/model/merges.txt',
 '/kaggle/working/submission/model/added_tokens.json',
 '/kaggle/working/submission/model/tokenizer.json')

In [12]:
# unload model from memory
import gc, torch
del model, tokenizer
gc.collect()
torch.cuda.empty_cache()

In [17]:
from transformers import AutoTokenizer

# Qwen-2 모델의 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained(downloaded_model)

# End token을 확인
end_token = tokenizer.bos_token
end_token_id = tokenizer.eos_token_id

print(f"End token: {end_token}")
print(f"End token ID: {end_token_id}")

End token: None
End token ID: 151645


In [13]:
%%writefile submission/main.py
# comment magic command before simulation

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
import sys

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)


 
KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
if os.path.exists(KAGGLE_AGENT_PATH):
    MODEL_PATH = os.path.join(KAGGLE_AGENT_PATH, "model")
else:
    MODEL_PATH = "/kaggle/working/submission/model"

    
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map = "auto",
    trust_remote_code = True,
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
id_eot = tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0]


def generate_answer(template):
    inp_ids = tokenizer(template, return_tensors="pt").to("cuda")
    out_ids = model.generate(**inp_ids,max_new_tokens=15).squeeze()
    start_gen = inp_ids.input_ids.shape[1]
    out_ids = out_ids[start_gen:]
    if id_eot in out_ids:
        stop = out_ids.tolist().index(id_eot)
        out = tokenizer.decode(out_ids[:stop])
    else:
        out = tokenizer.decode(out_ids)
    return out
    

class Robot:
    def __init__(self):
        pass
    
    def on(self, mode, obs):
        assert mode in ["asking", "guessing", "answering"], "mode can only take one of these values: asking, answering, guessing"
        if mode == "asking":
            #launch the asker role
            output = self.asker(obs)
        if mode == "answering":
            #launch the answerer role
            output = self.answerer(obs)
            if "yes" in output.lower():
                output = "yes"
            elif "no" in output.lower():
                output = "no"   
            if ("yes" not in output.lower() and "no" not in output.lower()):
                output = "yes"
        if mode == "guessing":
            #launch the guesser role
            output = self.asker(obs)
        return output
    
    
    def asker(self, obs):
        sys_prompt = """You are a helpful AI assistant, and your are very smart in playing 20 questions game,
        the user is going to think of a word, it can be only one of the following 2 categories:
        1. a place
        2. a thing
        So focus your area of search on these options. and give smart questions that narrows down the search space\n"""
    
        if obs.turnType =="ask":
            ask_prompt = sys_prompt + """your role is to find the word by asking him up to 20 questions, your questions to be valid must have only a 'yes' or 'no' answer.
            to help you, here's an example of how it should work assuming that the keyword is Morocco:
            examle:
            <assistant: is it a place?
            user: yes
            assistant: is it in europe?
            user: no
            assistant: is it in africa?
            user: yes
            assistant: do most people living there have dark skin?
            user: no
            assistant: is it a country name starting by m ?
            user: yes
            assistant: is it Morocco?
            user: yes>

            the user has chosen the word, ask your first question!
            please be short and not verbose, give only one question, no extra word!"""
            chat_template = f"""<s><|system|> \n\n{ask_prompt}<|im_end|>"""
            chat_template += "<|assistant|> \n\n"
            if len(obs.questions)>=1:
                for q, a in zip(obs.questions, obs.answers):
                    chat_template += f"{q}<|im_end|><|user|> \n\n"
                    chat_template += f"{a}<|im_end|><|assistant|> \n\n"
                    
        elif obs.turnType == "guess":
            conv = ""
            for q, a in zip(obs.questions, obs.answers):
                conv += f"""Question: {q}\nAnswer: {a}\n"""
            guess_prompt =  sys_prompt + f"""so far, the current state of the game is as following:\n{conv}
            based on the conversation, can you guess the word, please give only the word, no verbosity around"""
            chat_template = f"""<s><|system|> \n\n{guess_prompt}<|im_end|>"""
            chat_template += "<|assistant|> \n\n"
                
        output = generate_answer(chat_template)        
        return output
        
        
        
    def answerer(self, obs):
        sys_prompt = f"""you are a helpful AI assistant, and your are very smart in playing 20 questions game,
        the role of the user is to guess the word by asking you up to 20 questions, your answers to be valid must be a 'yes' or 'no', any other answer is invalid and you lose the game.
        Know that the user will always guess a word belonging to one of the following 2 categories:
        1. a place
        2. a thing
        so make sure you understand the user's question and you understand the keyword you're playig on.
        for now the word that the user should guess is: "{obs.keyword}", it is of category "{obs.category}",
        to help you, here's an example of how it should work assuming that the keyword is Morocco in the category "place":
        examle:
        <user: is it a place?
        assistant: yes
        user: is it in europe?
        assistant: no
        user: is it in africa?
        assistant: yes
        user: do most people living there have dark skin?
        assistant: no
        user: is it a country name starting by m ?
        assistant: yes
        user: is it Morocco?
        assistant: yes>"""
        
        chat_template = f"""<s><|system|> \n\n{sys_prompt}<|im_end|>"""
        chat_template += "<|user|> \n\n"
        chat_template += f"{obs.questions[0]}<|im_end|>"
        chat_template += "<|assistant|> \n\n"
        if len(obs.answers)>=1:
            for q, a in zip(obs.questions[1:], obs.answers):
                chat_template += f"{a}<|im_end|><|user|> \n\n"
                chat_template += f"{q}<|im_end|><|assistant|> \n\n"
        output = generate_answer(chat_template)
        return output
    
    
robot = Robot()


def agent(obs, cfg):
    
    if obs.turnType =="ask":
        response = robot.on(mode = "asking", obs = obs)
        
    elif obs.turnType =="guess":
        response = robot.on(mode = "guessing", obs = obs)
        
    elif obs.turnType =="answer":
        response = robot.on(mode = "answering", obs = obs)
        
    if response == None or len(response)<=1:
        response = "yes"
        
    return response

Overwriting submission/main.py


In [14]:

!apt install pigz pv > /dev/null





In [15]:
!tar --use-compress-program='pigz --fast --recursive | pv' -cf submission.tar.gz -C /kaggle/working/submission .


4.56GiB 0:01:31 [51.2MiB/s] [       <=>                                        ]
