In [38]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

class ShortTermMemory:
    def __init__(self, max_tokens=512):
        self.history = []
        self.max_tokens = max_tokens
        self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B", trust_remote_code=True)

    def add(self, user, assistant):
        self.history.append({"user": user, "assistant": assistant})
        self._trim()

    def _trim(self):
        total_tokens = 0
        trimmed_history = []
        for pair in reversed(self.history):
            user_tokens = len(self.tokenizer.encode(pair["user"]))
            assistant_tokens = len(self.tokenizer.encode(pair["assistant"]))
            if total_tokens + user_tokens + assistant_tokens > self.max_tokens:
                break
            trimmed_history.insert(0, pair)
            total_tokens += user_tokens + assistant_tokens
        self.history = trimmed_history

    def get_context(self):
        context = ""
        for pair in self.history:
            context += f"<|user|>\n{pair['user']}\n<|assistant|>\n{pair['assistant']}\n"
        return context

class Chatbot:
    def __init__(self, model_name="Qwen/Qwen1.5-0.5B-Chat", max_new_tokens=256):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
        self.memory = ShortTermMemory(max_tokens=512)
        self.max_new_tokens = max_new_tokens

    def chat(self, user_input):
        context = self.memory.get_context()
        full_prompt = context + f"<|user|>\n{user_input}\n<|assistant|>\n"
        input_ids = self.tokenizer.encode(full_prompt, return_tensors="pt").to(self.model.device)

        output_ids = self.model.generate(
            input_ids,
            max_new_tokens=self.max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.95
        )

        full_output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        assistant_reply = full_output.split("<|assistant|>")[-1].strip()
        self.memory.add(user_input, assistant_reply)
        return assistant_reply

if __name__ == "__main__":
    bot = Chatbot()
    print("🤖 與 Qwen 聊天中（輸入 'exit' 離開）")

    while True:
        user_input = input("你：")
        if user_input.lower() == "exit":
            break
        reply = bot.chat(user_input)
        print(f"Qwen：{reply}")



tokenizer_config.json:   0%|          | 0.00/9.73k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

🤖 與 Qwen 聊天中（輸入 'exit' 離開）
你：你好我叫Kevin
Qwen：Kevin
你：我喜歡打籃球你呢
Qwen：我也喜欢打籃球， Kevin！
你：我還喜歡喝酒你覺得喝酒好嗎
Qwen：喝酒也有它的美妙之处， Kevin。它可以刺激你的神经，帮助你放松心情，但是過度饮酒也可能对身体产生不良影响。
你：我爽就好少囉唆
Qwen：那很好， Kevin。你可以选择自己喜欢的方式来放松自己。
你：那我問你我的名字叫什麼
Qwen：你的名字叫做 Kevin


KeyboardInterrupt: Interrupted by user