In [None]:
import bs4
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatZhipuAI
from zhipuai import ZhipuAI
from dotenv import load_dotenv
import os

In [8]:
load_dotenv()
api_key=os.environ.get('ZHIPU_API_KEY')

In [9]:
# build llm
chat = ChatZhipuAI(
    model_name="glm-4",
    zhipuai_api_key=api_key,  
    zhipuai_api_base="https://open.bigmodel.cn/api/paas/v4/chat/completions", 

)

In [10]:
# Load documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [11]:
# clean documents and chunking
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [12]:
# build embedding generator for vectorizing
# vector database in langchain only accept class instead of single list we just created, so we have to encapsulate it.
class EmbeddingGenerator:
    def __init__(self, model_name):
        self.model_name = model_name
        self.client = ZhipuAI(api_key=api_key)

    def embed_documents(self, texts):
        embeddings = []
        for text in texts:
            response = self.client.embeddings.create(model=self.model_name, input=text)
            if hasattr(response, 'data') and response.data:
                embeddings.append(response.data[0].embedding)
            else:
                # if fail return 0 vector
                embeddings.append([0] * 1024)  # embedding is 1024 in glm4
        return embeddings


    def embed_query(self, query):
        # same logic, but only once for query
        response = self.client.embeddings.create(model=self.model_name, input=query)
        if hasattr(response, 'data') and response.data:
            return response.data[0].embedding
        return [0] * 1024  # if fail return 0 vector

In [13]:
embedding_generator = EmbeddingGenerator(model_name="embedding-2")

In [14]:
# Embed all texts
texts = [content for document in splits for split_type, content in document if split_type == 'page_content']

In [15]:
# build chroma vector database
chroma_store = Chroma(
    collection_name="example_collection",
    embedding_function=embedding_generator,  # use the embedding we have
    create_collection_if_not_exists=True
)

In [16]:
# Add texts into the database
IDs = chroma_store.add_texts(texts=texts)

In [17]:
# Retrieve
retriever = chroma_store.as_retriever()

In [18]:
# we want to contextualize the history, so the model has clear question to answer in the new round

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [19]:
# history_aware_retriever
history_aware_retriever = create_history_aware_retriever(chat,
                                                         retriever,
                                                         contextualize_q_prompt
)

In [20]:


qa_system_prompt = """You are an assistant for question-answering tasks. \  
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


In [21]:

question_answer_chain = create_stuff_documents_chain(chat, qa_prompt)

In [22]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [23]:
# Use dictionary for managing history
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [24]:
first_ans = conversational_rag_chain.invoke(
    {"input": "What is Task Decomposition?"},
    config={
        "configurable": {"session_id": "abc123"}
    },
)["answer"]

print(f"first_ans:{first_ans}")

first_ans:Task decomposition is a method of breaking down complex tasks into smaller, more manageable subgoals or steps. This can be done through various techniques including simple prompting, task-specific instructions, and human inputs. One advanced approach involves using a language model to interface with an external classical planner, which plans the task using PDDL and then translates the plan back into natural language.


In [25]:
second_ans = conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

print(f"second_ans:{secone_ans}")

secone_ans:Common ways of task decomposition include: 1) using simple prompts like "Steps for XYZ" to guide an LLM to list subgoals, 2) providing task-specific instructions tailored to the complexity of the task, and 3) incorporating human inputs to guide the decomposition process. Another approach is using Chain of Thought (CoT) or Tree of Thoughts, which involve breaking tasks into a series of steps or reasoning paths to simplify problem-solving.


In [26]:
chroma_store.delete_collection()