# Quick & Dirty e2e pipeline

## Test NotionDB connection
ref: https://python.langchain.com/docs/integrations/document_loaders/notiondb

In [1]:
!python -V

Python 3.11.7


In [2]:
from langchain_community.document_loaders import NotionDBLoader
import tomllib

In [3]:
with open('../.tokens.toml', 'rb') as f:
    _TOKENS = tomllib.load(f)

with open('../.notion_databases.toml', 'rb') as f:
    _DATABASES_NOTION = tomllib.load(f)

In [4]:
_DATABASES_NOTION

{'笔记（非文学）': '7443174d151342458fb7e47acdbb0c64',
 '读书笔记（文学）': '808e50735cce4c66a151f3e8b79c8d9d',
 '写作': '8b81c07089344d8d95ddba22bafb8c12',
 '格言小集': '04d75f24b9544f3f8b220d5e7d87b7c3'}

In [5]:
loader = NotionDBLoader(
    integration_token=_TOKENS['notion'],
    database_id=_DATABASES_NOTION['格言小集'],
    request_timeout_sec=30,  # optional, defaults to 10
)

In [6]:
%%time
docs = loader.load()

CPU times: user 216 ms, sys: 22.3 ms, total: 238 ms
Wall time: 3.33 s


In [7]:
print(docs[:2])

[Document(page_content='人生四不捡：塔吊下边冰红茶，铁轨上边牛肉干，过山车下八宝粥，课桌后边葡萄干', metadata={'name': '笑死', 'id': '2303056b-72f1-4f9a-b377-cf6b12a0d424'}), Document(page_content='\n名为太阳的溶剂\n\n楼群之中，乌鸦堕落为一种失去预言性的可厌生物\n\n对大多数人来说，他们所抱定的观念只是为他们生存方式做辩护的使用说明书而已。\n\n颤抖的彩虹其实是光的边角余料。\n\n以前的建筑是不透明的甲壳类，现在的建筑是透光的腔肠类。\n\n射电望远镜就是宇宙射线的向日葵。\n\n电线像静脉一样生长着。\n\n绝望的尽头并非对绝望感到绝望，而是对绝望感到厌烦。相对地，希望的尽头也只是对希望的疲倦而已。与一般的见解不同，绝望和希望是生活的激发态。\n', metadata={'name': '沃滋集朔德', 'id': '340f4781-1042-4534-b286-b758b0ca09a4'})]


## Conversational RAG with memory
https://python.langchain.com/docs/expression_language/cookbook/retrieval

- llm-aided retrieval
- ReAct framework

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [9]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=_TOKENS['huggingface'], 
    # https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2#sentence-transformersparaphrase-multilingual-minilm-l12-v2
    model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
)

In [10]:
%%time
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(
    documents=splits, 
    embedding=embeddings, 
    persist_directory='../database/playground_test'
)

# TODO: use SelfQueryRetriever to allow in metadata context
# https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/
retriever = vectorstore.as_retriever()

CPU times: user 338 ms, sys: 48.2 ms, total: 386 ms
Wall time: 774 ms


In [11]:
# # this uses huggingfacehub to compute
# # free but can be very slow
# from langchain import HuggingFaceHub

# llm = HuggingFaceHub(
#     huggingfacehub_api_token=_TOKENS['huggingface'],
#     repo_id='01-ai/Yi-6B-Chat', 
#     # https://huggingface.co/transformers/v4.9.2/main_classes/model.html#transformers.generation_utils.GenerationMixin.generate
#     model_kwargs={"temperature": 0.1},
# )

In [12]:
# use local model
from langchain_community.chat_models import ChatOllama
from langchain_community.llms import Ollama

# TODO: how to change arguments (temperature, top_k, etc.)
llm = Ollama(model="yi")

# need to run `ollama serve` first before inference

In [17]:
import langchain
langchain.debug = True

In [14]:
langchain.debug = False

In [26]:
%%time 
from langchain.prompts import PromptTemplate, ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
    ("system", "你是 AI 助手"),
    ("user", "{input}")
])
chain = prompt | llm 

chain.invoke({"input": "你好，你是谁？"})

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "你好，你是谁？"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m{
  "input": "你好，你是谁？"
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:prompt:ChatPromptTemplate] [2ms] Exiting Prompt run with output:
[0m{
  "lc": 1,
  "type": "constructor",
  "id": [
    "langchain",
    "prompts",
    "chat",
    "ChatPromptValue"
  ],
  "kwargs": {
    "messages": [
      {
        "lc": 1,
        "type": "constructor",
        "id": [
          "langchain",
          "schema",
          "messages",
          "SystemMessage"
        ],
        "kwargs": {
          "content": "你是 AI 助手",
          "additional_kwargs": {}
        }
      },
      {
        "lc": 1,
        "type": "constructor",
        "id": [
          "langchain",
          "schema",
          "messages",
          "HumanMessage"


KeyboardInterrupt: 

In [20]:
from operator import itemgetter

from langchain.schema import format_document
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

from langchain.memory import ConversationBufferMemory

from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.output_parsers import StrOutputParser

# prompt template
# https://github.com/langchain-ai/langchain/tree/master/templates

In [21]:
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)

In [22]:
_template = """请将以下对话翻译成中文，并将后续问题以原语言改写为一个新的独立的问题。


对话记录：
{chat_history}
后续问题：{question}
新的问题："""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)


DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)


template = """请仅仅根据以下资料回答问题。请用中文回答：
{context}

问：{question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [23]:
# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)

# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | llm
    | StrOutputParser(),
}

# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}

# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}

# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | ANSWER_PROMPT | llm,
    "docs": itemgetter("docs"),
}

# And now we put it all together!
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [24]:
%%time

inputs = {"question": '"勤钳工，懒车工"后面是什么？'}
result = final_chain.invoke(inputs)
result

KeyboardInterrupt: 

In [None]:
prompt_template = """
您是一款人工智能语言模型助手。您的任务是根据以下相关资料找到问题的答案。如果你不知道答案，请问答“我不知道。”

资料：{context}

问：{query}
答：
"""