# LangChain: RAG

Colab Notebook:
https://colab.research.google.com/drive/1rPTvZM4HO--UNKHQ3CfDV7p3_zhRIX_j?usp=sharing

In [None]:
!pip install  langchain langchain-community langchainhub langchain-openai chromadb bs4

In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
# from langchain.prompts import ChatPromptTemplate



In [None]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [None]:
llm = AzureChatOpenAI(temperature=0.0, openai_api_version="2023-12-01-preview", azure_deployment="gpt-35-turbo-felix", openai_api_key=os.getenv("AZURE_OPENAI_KEY"))
embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002-felix",
    openai_api_version="2023-12-01-preview",
)

In [None]:
template = """使用以下内容回答最后的问题。请使用中文回答
如果你不知道答案，就说你不知道，不要试图编造答案。
最多使用三句话，并尽可能简明扼要。
总是在回答的最后说“谢谢你的提问！”

{context}

问题是: {question}

有用的回答:"""
custom_rag_prompt = PromptTemplate.from_template(template)

In [None]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://export.shobserver.com/baijiahao/html/715336.html",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("titleFont", "contentFont")
        )
    ),
)
docs = loader.load()
docs


In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=64)
splits = text_splitter.split_documents(docs)

splits


In [None]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [None]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever(search_kwargus={"k": 2})

In [None]:

# prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain.invoke("结合乘务长娄颖雯的介绍，回答C919大飞机的客舱过道有什么特点？")

In [None]:
rag_chain.invoke("C919飞机上的餐食有什么特点？")

In [None]:
rag_chain.invoke("娄颖雯是什么时候开始在C919工作的")

In [None]:
rag_chain.invoke("已经投运的C919飞机共有几架？")