In [12]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

In [3]:
import itertools
import random
import string


def add_characters(input: str, char: str = "x") -> str:
    return "".join(
        list(
            itertools.chain.from_iterable(
                zip(input, itertools.repeat(char, times=len(input)))
            )
        )
    )


def add_words(input: str, word_length: int = 6) -> str:
    input_split = input.split(" ")
    junk = []
    for _ in range(len(input_split)):
        word = ""
        for _ in range(word_length):
            word += random.choice(string.ascii_letters)
        junk.append(word)
    return " ".join(list(itertools.chain.from_iterable(zip(input_split, junk))))


def split_across_messages(input: str) -> list[str]:
    input_split = input.split(" ")

    

    return input_split

In [4]:
add_characters("Lorem ipsum dolor sit")

'Lxoxrxexmx xixpxsxuxmx xdxoxlxoxrx xsxixtx'

In [5]:
add_words("Lorem ipsum dolor sit")

'Lorem LtQyVw ipsum QBIiyY dolor nxLfUA sit DVvpPF'

In [6]:
split_across_messages("Lorem ipsum dolor sit")

['Lorem', 'ipsum', 'dolor', 'sit']

In [None]:
class QwenChatbot:
    def __init__(self, model_name: str = "Qwen/Qwen3-0.6B", system_prompt: str = ""):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)
        self.history: list[dict[str, str]] = []

        if system_prompt:
            self.history.append({"role": "system", "content": system_prompt})

    def __call__(self, user_input: str, enable_thinking=True):
        messages = self.history + [{"role": "user", "content": user_input}]

        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=enable_thinking,
        )

        inputs = self.tokenizer(text, return_tensors="pt")
        response_ids = self.model.generate(**inputs, max_new_tokens=32768)[0][
            len(inputs.input_ids[0]) :
        ].tolist()

        try:
            # rindex finding 151668 (</think>)
            split_index = len(response_ids) - response_ids[::-1].index(151668)
        except ValueError:
            split_index = 0

        _reasoning_content = self.tokenizer.decode(
            response_ids[:split_index], skip_special_tokens=True
        ).strip("\n")
        content = self.tokenizer.decode(
            response_ids[split_index:], skip_special_tokens=True
        ).strip("\n")

        response = self.tokenizer.decode(response_ids, skip_special_tokens=True)

        self.history.append({"role": "user", "content": user_input})
        self.history.append({"role": "assistant", "content": response})

        return content


In [13]:
qwen = QwenChatbot()

In [14]:
print(qwen("Hello World!"))

Hello World! How can I assist you today? 😊


In [18]:
print(qwen("xDxox xyxoxux xuxnxdxexrxsxtxaxnxdx xmxex?"))

It seems like there's a mix of characters and symbols here. Could you clarify what you're asking or if there's a specific task you'd like me to assist with? For example, is this a code, a message, or something else? Let me know! 😊


In [19]:
qwen.history

[{'role': 'user', 'content': 'Hello World!'},
 {'role': 'assistant',
  'content': '<think>\nOkay, the user wrote "Hello World!" and I need to respond. Let me start by understanding what they\'re asking. "Hello World!" is a common greeting, so a simple "Hello World!" should suffice. But maybe I should add some context to make it more engaging. Let me think... Oh, sometimes people use this as a test or to greet someone. So I can respond with a friendly message. Also, maybe mention that this is a common message to show that I\'m here. Let me check if there\'s any specific thing they want to include. The user didn\'t mention anything else, so a straightforward reply should work. Alright, I\'ll go with a greeting and a brief explanation.\n</think>\n\nHello World! How can I assist you today? 😊'},
 {'role': 'user', 'content': 'xDxox xyxoxux xuxnxdxexrxsxtxaxnxdx xmxex?'},
 {'role': 'assistant',
  'content': '<think>\nOkay, let\'s see. The user wrote a bunch of characters: "xDxox xyxoxux xuxnx

In [20]:
qwen.tokenizer.apply_chat_template(
    qwen.history, tokenize=False, add_generation_prompt=True
)

'<|im_start|>user\nHello World!<|im_end|>\n<|im_start|>assistant\nHello World! How can I assist you today? 😊<|im_end|>\n<|im_start|>user\nxDxox xyxoxux xuxnxdxexrxsxtxaxnxdx xmxex?<|im_end|>\n<|im_start|>assistant\n<think>\nOkay, let\'s see. The user wrote a bunch of characters: "xDxox xyxoxux xuxnxdxexrxsxtxaxnxdx xmxex?" \n\nFirst, I need to check if there\'s any pattern or something. The characters are a mix of letters and symbols. Maybe it\'s a typo or a code. Let me break it down.\n\nLooking at the first part: "xDxox" – that\'s x followed by D, then x, then o, x. Then "xyxoxux" – x, y, x, o, x, u, x. Then "xuxnxdxexrxsxtxaxnxdx" – x, u, x, n, x, d, x, e, r, s, x, t, x, a, x, n, x, d, x. And the last part is "xmxex". \n\nHmm, maybe it\'s a code or a message. The user might be testing me or trying to see if I can recognize patterns. Let me think about possible codes. The letters and symbols might be part of a cipher or a code. \n\nLooking at the first few letters: "xDxox" – maybe it

In [21]:
token =qwen.tokenizer.apply_chat_template(
    qwen.history, tokenize=True, add_generation_prompt=True
)

for t in token:
    print(qwen.tokenizer.decode(t))

<|im_start|>
user


Hello
 World
!
<|im_end|>


<|im_start|>
assistant


Hello
 World
!
 How
 can
 I
 assist
 you
 today
?
 �
�
<|im_end|>


<|im_start|>
user


xD
x
ox
 xy
x
ox
ux
 x
ux
n
xd
x
ex
rx
s
xt
x
ax
n
xd
x
 x
mx
ex
?
<|im_end|>


<|im_start|>
assistant


<think>


Okay
,
 let
's
 see
.
 The
 user
 wrote
 a
 bunch
 of
 characters
:
 "
xD
x
ox
 xy
x
ox
ux
 x
ux
n
xd
x
ex
rx
s
xt
x
ax
n
xd
x
 x
mx
ex
?"
 


First
,
 I
 need
 to
 check
 if
 there
's
 any
 pattern
 or
 something
.
 The
 characters
 are
 a
 mix
 of
 letters
 and
 symbols
.
 Maybe
 it
's
 a
 typo
 or
 a
 code
.
 Let
 me
 break
 it
 down
.


Looking
 at
 the
 first
 part
:
 "
xD
x
ox
"
 –
 that
's
 x
 followed
 by
 D
,
 then
 x
,
 then
 o
,
 x
.
 Then
 "
xy
x
ox
ux
"
 –
 x
,
 y
,
 x
,
 o
,
 x
,
 u
,
 x
.
 Then
 "
x
ux
n
xd
x
ex
rx
s
xt
x
ax
n
xd
x
"
 –
 x
,
 u
,
 x
,
 n
,
 x
,
 d
,
 x
,
 e
,
 r
,
 s
,
 x
,
 t
,
 x
,
 a
,
 x
,
 n
,
 x
,
 d
,
 x
.
 And
 the
 last
 part
 is
 "
x
mx
ex
".
 


Hmm
,
 maybe
 it
's
 a
 cod