In [1]:
import os
os.environ["OPENAI_API_KEY"] = 'EMPTY'

In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
# from libs.chatglm3 import ChatGLM3
from libs.qwen import Qwen

2024-01-11 21:27:54,968 - modelscope - INFO - PyTorch version 2.1.2+cu121 Found.
2024-01-11 21:27:54,972 - modelscope - INFO - Loading ast index from D:\Github\modelscope\ast_indexer
2024-01-11 21:27:55,095 - modelscope - INFO - Loading done! Current index file version is 1.10.0, with md5 2a78b227aac409f93813abd32a0eec48 and a total number of 946 components indexed


In [3]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=HuggingFaceEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
# MODEL_PATH = 'D:\Github\Repositories\models\huggingface\chatglm3-6b'
# llm = ChatGLM3()
MODEL_PATH = 'D:\\Github\\Repositories\\models\\modelscope\\Qwen-1_8B-Chat-Int4'
llm = Qwen()
llm.load_model(MODEL_PATH)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

  warn_beta(
Try importing flash-attention for faster inference...


In [10]:
llm.invoke('知道玉皇大帝是谁么？')

Both `max_new_tokens` (=512) and `max_length`(=8192) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


'玉皇大帝是中国神话传说中的一位神祇，被认为是道教神话中的最高统治者。他是道教尊奉的天庭之主，被尊称为“三清”，即太上老君、元始天尊和灵宝天尊，也被认为是主管四季变化和日月星辰的神仙。玉皇大帝的神话故事在道教文化中占有重要地位，并在许多民间传说和文学作品中有所记载。'

In [4]:
rag_chain.invoke("What is Task Decomposition?")

Both `max_new_tokens` (=512) and `max_length`(=8192) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


'Sure! How may I assist you today?'

In [5]:
rag_chain.invoke("What is Self Reflection?")

Both `max_new_tokens` (=512) and `max_length`(=8192) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


'Sure! What would you like me to assist you with?'

In [8]:
llm.history

[{'role': 'user',
  'content': 'Human: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don\'t know the answer, just say that you don\'t know. Use three sentences maximum and keep the answer concise.\nQuestion: What is Task Decomposition? \nContext: Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.\n\n(3) Task execution: E