# LangChain: RAG

In [25]:
!pip install  langchain langchain-community langchainhub langchain-openai chromadb bs4

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting langsmith<0.1,>=0.0.83 (from langchain)
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/97/cd/1c618f89d3fcbb375c99a3ea950bffba8a01862cc0f0ab5032dfb95e8d1e/langsmith-0.0.92-py3-none-any.whl (56 kB)
INFO: pip is looking at multiple versions of langchain-core to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-core<0.2,>=0.1.22 (from langchain)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/93/fa/a671529ad3932c8e8da0ab3443ac7b9989e7fb622e78bcd7051581a93209/langchain_core-0.1.31-py3-none-any.whl (258 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.8/258.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/53/b3/ae022560a8b104525b4ac1a97a557e3aa05dd0d233bb5284f7c63509742f/langchain_core-0.1.30-py3-none-any.whl (256 kB)
[2K     [90m━━━━━━━━━━━

In [27]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
# from langchain.prompts import ChatPromptTemplate



In [28]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [29]:
llm = AzureChatOpenAI(temperature=0.0, openai_api_version="2023-12-01-preview", azure_deployment="gpt-35-turbo-felix", openai_api_key=os.getenv("AZURE_OPENAI_KEY"))
embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002-felix",
    openai_api_version="2023-12-01-preview",
)

In [39]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

In [40]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()
docs


[Document(page_content='', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})]

In [38]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
splits


[Document(page_content='C919龙年首飞，乘务长带着自豪感迎接双向客流2月10日早上8时15分，东航MU9197航班从上海虹桥机场起飞，飞往成都天府机场。这是国产大飞机C919的龙年首飞。“今天是大年初一，是中国传统的新春佳节。 在这个喜庆祥和的节日里，我们全体机组成员向您致以最诚挚的问候……”飞机上，乘务长娄颖雯开始播报。今年是C919首次服务春运。目前已经投运的C919飞机共有四架，都属于东航，分别在京沪、沪蓉航线执飞。娄颖雯介绍，龙年首个航班，C919飞机客座率超过九成，大多是举家出行的旅客，有的带着小孩，有的带着老人。不少旅客第一次乘坐国产大飞机，激动的情绪难以掩饰。有老人感慨：“没想到有生之年可以乘坐中国自己造的大飞机。”小朋友们也十分兴奋，因为大年初一，等飞机平飞后，每个C919航班上都会举行“龙腾闹新春”活动。乘务组会给旅客发福袋、玩偶等，并为他们拍照留念。此外，龙年到了，C919飞机上的餐食也别具一格。龙造型的“小笼包”和甜品让旅客直呼用心。而红豆味的大白兔牛奶又充满上海特色，让旅客印象深刻。娄颖雯是第一批C919乘务员，2023年5月28日，东航首架C919进行商业首飞时，她就在飞机上。“以前飞的都是波音和空客，现在能飞自己国家的飞机，确实很幸运，也感到很自豪。”她还记得自己曾询问父亲的意见，父亲说：“去吧，我支持你。”这也让她充满了力量。自C919开始执飞商业航班以来，几乎每天都会有旅客问“这是首架飞机吗？”“国产大飞机有什么特色？”这时，娄颖雯就会情不自禁地介绍起来。“你可以看航班号，C919A是首架飞机，机身上有首架机的印章。C919的过道比波音、空客宽，并且客舱三个座位中间的那个座位，比两边座位宽1.5厘米，坐在中间也挺舒适。此外，飞机的氛围灯会根据不同场景切换，在迎送客时会选用彩虹灯；洗手间水温有8档调温；2.25米的过道高度也比一般飞机高……”当天，娄颖雯飞抵成都后，又跟随后续的C919航班飞回上海。在回程航班上，飞机客座率依然超过九成。“C919的客座率一直都很高，大家都希望打卡。此外，2023年以来，一个明显的趋势是，双向出行的旅游客流量很大，不像以前许多线路只有去程人多、返程人少。”娄颖雯说。新年的C919，见证了中国旅游经济的活力。栏目主编：李晔本文作者：王力题图来源：采访对象提供图片来源：采访对象提供'

In [None]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
# prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

In [37]:
rag_chain.invoke("结合乘务长娄颖雯的介绍，回答C919大飞机的客舱过道有什么特点？")

'很抱歉，我不知道C919大飞机客舱过道的特点。谢谢你的提问！'

In [None]:
agent("What is the 25% of 300?")

In [None]:
question = "Tom M. Mitchell is an American computer scientist \
and the Founders University Professor at Carnegie Mellon University (CMU)\
what book did he write?"
result = agent(question) 

## Python Agent

In [None]:
agent = create_python_agent(
    llm,
    tool=PythonREPLTool(),
    verbose=True
)

In [None]:
customer_list = [["Harrison", "Chase"], 
                 ["Lang", "Chain"],
                 ["Dolly", "Too"],
                 ["Elle", "Elem"], 
                 ["Geoff","Fusion"], 
                 ["Trance","Former"],
                 ["Jen","Ayai"]
                ]

In [None]:
agent.run(f"""Sort these customers by \
last name and then first name \
and print the output: {customer_list}""") 

#### View detailed outputs of the chains

In [None]:
import langchain
langchain.debug=True
agent.run(f"""Sort these customers by \
last name and then first name \
and print the output: {customer_list}""") 
langchain.debug=False

## Define your own tool

In [None]:
#!pip install DateTime

In [None]:
from langchain.agents import tool
from datetime import date

In [None]:
@tool
def time(text: str) -> str:
    """Returns todays date, use this for any \
    questions related to knowing todays date. \
    The input should always be an empty string, \
    and this function will always return todays \
    date - any date mathmatics should occur \
    outside this function."""
    return str(date.today())

In [None]:
agent= initialize_agent(
    tools + [time], 
    llm, 
    agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True,
    verbose = True)

**Note**: 

The agent will sometimes come to the wrong conclusion (agents are a work in progress!). 

If it does, please try running it again.

In [None]:
try:
    result = agent("whats the date today?") 
except: 
    print("exception on external access")