## HuggingFacePipeline 載入 從transformers下載的model (不推薦)

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from langchain.prompts import ChatPromptTemplate
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

llm = HuggingFacePipeline.from_model_id(
    model_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        do_sample=False,
        repetition_penalty=1.03,
        return_full_text=False,
    ),
    model_kwargs={"quantization_config": quantization_config},
)

chat_model = ChatHuggingFace(llm=llm)

## Ollama 載入 model (推薦)

In [2]:
from langchain_ollama import OllamaLLM, ChatOllama
chat_model = ChatOllama(model='llama3.2:3b')

# 1. 直接使用chat 模型

In [3]:
from langchain_core.messages import HumanMessage

chat_model.invoke([HumanMessage(content="Hi! I'm Jason")])

AIMessage(content="Hello Jason! It's nice to meet you. Is there something I can help you with, or would you like to chat for a bit?", additional_kwargs={}, response_metadata={'model': 'llama3.2:3b', 'created_at': '2024-11-10T15:15:08.9153004Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 962800800, 'load_duration': 24253800, 'prompt_eval_count': 30, 'prompt_eval_duration': 39000000, 'eval_count': 30, 'eval_duration': 898000000}, id='run-e40fd933-b554-4a92-a5ac-7fc48f106ddc-0', usage_metadata={'input_tokens': 30, 'output_tokens': 30, 'total_tokens': 60})

In [4]:
# 它沒有將先前的對話輪次作為上下文，因此無法回答問題。 這會導致糟糕的聊天機器人體驗！
chat_model.invoke([HumanMessage(content="What's my name?")])

AIMessage(content="I don't know your name. I'm a large language model, I don't have any information about you, and our conversation just started. Would you like to share your name with me?", additional_kwargs={}, response_metadata={'model': 'llama3.2:3b', 'created_at': '2024-11-10T15:15:26.9214226Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1073383100, 'load_duration': 20118000, 'prompt_eval_count': 30, 'prompt_eval_duration': 40000000, 'eval_count': 40, 'eval_duration': 1011000000}, id='run-2af43cd8-7243-4914-b276-947116710462-0', usage_metadata={'input_tokens': 30, 'output_tokens': 40, 'total_tokens': 70})

In [5]:
# 手動將歷史對話放到Message
from langchain_core.messages import AIMessage

chat_model.invoke(
    [
        HumanMessage(content="Hi! I'm Bob"),
        AIMessage(content="Hello Bob! How can I assist you today?"),
        HumanMessage(content="What's my name?"),
    ]
)
# 現在我們可以看到我們得到了一個好的回應！
# 這是支撐聊天機器人進行對話互動的基本概念

AIMessage(content="Your name is Bob. That's what we established at the beginning of our conversation. Is there anything else I can help with, Bob?", additional_kwargs={}, response_metadata={'model': 'llama3.2:3b', 'created_at': '2024-11-10T15:15:27.6316602Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 505539500, 'load_duration': 21478400, 'prompt_eval_count': 55, 'prompt_eval_duration': 15000000, 'eval_count': 29, 'eval_duration': 467000000}, id='run-8b4dc0cb-0cdc-4c24-8c60-df8cadc30e94-0', usage_metadata={'input_tokens': 55, 'output_tokens': 29, 'total_tokens': 84})

# 消息
我們可以匯入相關類別並設定我們的鏈，該鏈包裝模型並新增此訊息歷史記錄。這裡的一個關鍵部分是我們作為get_session_history傳入的函數。這個函數預計接受一個session_id並傳回一個訊息歷史物件。這個session_id用於區分不同的對話，並應作為配置的一部分在呼叫新鏈時傳入

In [6]:
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory,
)
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(chat_model, get_session_history)

In [7]:
# 我們現在需要創建一個config，每次都傳遞給可運行的部分。
# 這個配置包含的資訊並不是直接作為輸入的一部分，但仍然是有用的。
# 在這種情況下，我們想要包含一個session_id。這應該看起來像：

In [9]:
config = {"configurable": {"session_id": "abc2"}}
response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Bob")],
    config=config,
)

response.content

"I think we've already had this conversation, Bob! You're right, it's a pleasure to meet you again! How's your day going so far?"

In [10]:
response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

response.content

"You just told me that your name is Bob! I remember. We were in the middle of chatting earlier too. Sorry if I didn't retain the conversation well, but I'm here to help and chat with you again, Bob!"

# 提示詞
現在，我們所做的只是為模型添加了一個簡單的持久化層。我們可以透過添加提示詞模板來使其變得更加複雜和個性化

In [11]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | chat_model

In [12]:
response = chain.invoke({"messages": [HumanMessage(content="hi! I'm bob")]})

response.content

"Hi Bob! It's nice to meet you. How can I assist you today? Do you have a question or topic you'd like to discuss?"

In [14]:
with_message_history = RunnableWithMessageHistory(chain, get_session_history)
config = {"configurable": {"session_id": "abc5"}}
response = with_message_history.invoke(
    {"messages": [HumanMessage(content="Hi! I'm Jim")]},
    config=config,
)
response.content

"Hello Jim! It's nice to meet you. Is there something I can help you with, or would you like to chat for a bit?"

In [16]:
response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

response.content

'Déjà vu, Jim! Your name remains... Jim!'

# 複雜的提示詞

In [17]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability in {language}.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | chat_model

In [18]:
response = chain.invoke(
    {"messages": [HumanMessage(content="hi! I'm bob")], "language": "Spanish"}
)

response.content

'Hola Bob, ¿cómo estás? ¡Es un placer conocerte! (Hi Bob, how are you? Nice to meet you!)'

現在讓我們將這個更複雜的鏈封裝在一個訊息歷史類別中。這次，由於輸入中有多個鍵，我們需要指定正確的鍵來保存聊天歷史。

In [19]:
with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages", # chain中的message
)

In [20]:
config = {"configurable": {"session_id": "abc11"}}

In [22]:
response = with_message_history.invoke(
    {"messages": [HumanMessage(content="hi! I'm todd")], "language": "Spanish"},
    config=config,
)

response.content

'Hola Todd, de nuevo. ¿Te gustaría conversar o tienes alguna pregunta en particular sobre algo en español? (Hello Todd again. Do you want to chat or do you have a question about something in Spanish?)'

In [23]:
response = with_message_history.invoke(
    {"messages": [HumanMessage(content="whats my name?")], "language": "Spanish"},
    config=config,
)

response.content

'Tu nombre es... TODD! (Your name is... TODD!) ¿Quieres hablar un poco más sobre ti, Todd? (Do you want to talk a bit more about yourself, Todd?)'

# 管理對話

In [24]:
from langchain_core.messages import SystemMessage, trim_messages

trimmer = trim_messages(
    max_tokens=65,
    strategy="last",
    token_counter=chat_model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]
# 以下三個message不見了:
#    HumanMessage(content="hi! I'm bob"),
#    AIMessage(content="hi!"),
#    HumanMessage(content="I like vanilla ice cream"),
trimmer.invoke(messages)

  from .autonotebook import tqdm as notebook_tqdm


[SystemMessage(content="you're a good assistant", additional_kwargs={}, response_metadata={}),
 HumanMessage(content="hi! I'm bob", additional_kwargs={}, response_metadata={}),
 AIMessage(content='hi!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='I like vanilla ice cream', additional_kwargs={}, response_metadata={}),
 AIMessage(content='nice', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='whats 2 + 2', additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content='no problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='yes!', additional_kwargs={}, response_metadata={})]

In [25]:
# 在chain中使用trimmer
from operator import itemgetter

from langchain_core.runnables import RunnablePassthrough

chain = (
    RunnablePassthrough.assign(messages=itemgetter("messages") | trimmer)
    | prompt
    | chat_model
)

response = chain.invoke(
    {
        "messages": messages + [HumanMessage(content="what's my name?")],
        "language": "English",
    }
)
response.content

# model忘記了messages中的名字

'Your name is Bob!'

In [26]:
response = chain.invoke(
    {
        "messages": messages + [HumanMessage(content="what math problem did i ask")],
        "language": "English",
    }
)
response.content

# model記得我們問的數學問題，因為在最近發生

'You asked about the math problem "2 + 2" earlier. I answered it as 4.'

In [27]:
# 整合trimmer的chain 搭配訊息歷史
with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages",
)

config = {"configurable": {"session_id": "abc20"}}

In [28]:
response = with_message_history.invoke(
    {
        "messages": messages + [HumanMessage(content="whats my name?")],
        "language": "English",
    },
    config=config,
)

response.content

'your name is Bob!'

In [29]:
response = with_message_history.invoke(
    {
        "messages": [HumanMessage(content="what math problem did i ask?")],
        "language": "English",
    },
    config=config,
)

response.content

'You asked 2 + 2, which I answered as 4.'

# 串流 Stream
所有鏈都暴露一個.stream方法，使用訊息歷史的鏈也不例外。我們可以簡單地使用該方法來獲取串流響應

In [30]:
# 在jupyter notebook看不出差異
config = {"configurable": {"session_id": "abc15"}}
for res in with_message_history.stream(
    {
        "messages": [HumanMessage(content="hi! I'm todd. tell me a joke")],
        "language": "English",
    },
    config=config,
):
    print(res.content, end="|")

Hi| Todd|,| nice| to| meet| you|!| Here|'s| one| for| you|:

|What| do| you| call| a| fake| nood|le|?

|(W|ait| for| it|...)

|An| imp|asta|!

|Hope| that| made| you| smile|,| Todd|!| Do| you| want| another| one|?||

# 