# 引用检索来源

本笔记本展示了如何使用OpenAI函数的能力从文本中提取引用。

In [1]:
# 导入所需的模块和函数
from langchain.chains import create_citation_fuzzy_match_chain
from langchain_openai import ChatOpenAI




In [2]:
question = "作者在大学期间做了什么？"
context = """
我的名字是Jason Liu，我在加拿大多伦多长大，但我出生在中国。
我上了一所艺术高中，但在大学里我学习了计算数学和物理学。
作为合作项目的一部分，我在包括Stitchfix、Facebook在内的许多公司工作过。
我还在滑铁卢大学创办了数据科学俱乐部，并担任俱乐部主席两年。
"""

In [3]:
# 创建一个名为llm的ChatOpenAI对象，设置temperature为0，模型为"gpt-3.5-turbo-0613"
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")

In [4]:
# 创建一个模糊匹配链条
chain = create_citation_fuzzy_match_chain(llm)

In [5]:
# 运行问题回答模型，传入问题和上下文
result = chain.run(question=question, context=context)

In [6]:
# 打印结果
print(result)

question='What did the author do during college?' answer=[FactWithEvidence(fact='The author studied Computational Mathematics and physics in university.', substring_quote=['in university I studied Computational Mathematics and physics']), FactWithEvidence(fact='The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.', substring_quote=['started the Data Science club at the University of Waterloo', 'president of the club for 2 years'])]


In [7]:
# 定义一个函数highlight，接受两个参数text和span
def highlight(text, span):
    # 返回一个字符串，包含以下内容：
    # "..."表示省略的文本
    # text[span[0] - 20 : span[0]]表示从span[0]前20个字符开始到span[0]之前的字符
    # "*"表示一个特殊字符
    # "\033[91m"表示设置文本颜色为红色
    # text[span[0] : span[1]]表示从span[0]到span[1]的字符
    # "\033[0m"表示重置文本颜色
    # "*"表示一个特殊字符
    # text[span[1] : span[1] + 20]表示从span[1]开始到span[1]后20个字符的字符
    # "..."表示省略的文本
    return (
        "..."
        + text[span[0] - 20 : span[0]]
        + "*"
        + "\033[91m"
        + text[span[0] : span[1]]
        + "\033[0m"
        + "*"
        + text[span[1] : span[1] + 20]
        + "..."
    )

In [8]:
# 遍历result.answer中的每个fact
for fact in result.answer:
    # 打印fact的内容
    print("Statement:", fact.fact)
    
    # 遍历fact在context中的每个span
    for span in fact.get_spans(context):
        # 打印span在context中的引用
        print("Citation:", highlight(context, span))
    
    # 打印空行，用于分隔每个fact的输出
    print()

Statement: The author studied Computational Mathematics and physics in university.
Citation: ...arts highschool but *[91min university I studied Computational Mathematics and physics[0m*. 
As part of coop I...

Statement: The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.
Citation: ...x, Facebook.
I also *[91mstarted the Data Science club at the University of Waterloo[0m* and I was the presi...
Citation: ...erloo and I was the *[91mpresident of the club for 2 years[0m*.
...

