In [None]:
from util import *
from tqdm import tqdm
import random

"""
本代码用于测试和评估模型信息筛选的能力，对于输入的问题的和已知知识，观察模型是否可以
很好的识别和找到对应知识
1.根据问题，找到k个最相关的知识
2.模型再进行一次筛选
3.将数据进行保存
"""

#根据问题得到k个知识
def get_k_knowledge(query,vector,k=5):
    docs=vector.similarity_search(query,k)
    return docs

faiss=load_embedding()

#读取数据，先仅用Additional的内容

# 把dev_complete.jsonl、test_complete.jsonl和train_complete.jsonl文件中的fact1字段作为知识
dev_file = 'data/openbookqa/Additional/dev_complete.jsonl'
test_file = 'data/openbookqa/Additional/test_complete.jsonl'
train_file = 'data/openbookqa/Additional/train_complete.jsonl'
json_files = [dev_file, test_file, train_file]
main_knowledge=[]
for file in json_files:
    with open(file, 'r', encoding='utf-8') as f:
        for line in f:
            data = json.loads(line)
            main_knowledge.append(data)
count=0
random.shuffle(main_knowledge)
print(len(main_knowledge))

In [2]:
"""
我们提出的方法进行知识问答分为以下几步：
1.根据问题，从知识库里面找到可能的相关事实
2.根据模型，对相关事实进行筛选，找到和问题实际相关的支持事实
3.结合支持事实，对问题进行解答，输出3个部分内容，答案，解析，支持事实
"""

#找到示例问题的相关事实
query="Frilled sharks and angler fish live far beneath the surface of the ocean, which is why they are known as"
docs=get_k_knowledge(query,faiss,k=10)

In [3]:
docs

[Document(page_content='Examples of deep sea animals are angler fish and frilled sharks', metadata={'编号': 340}),
 Document(page_content='deep sea animals live deep in the ocean', metadata={'编号': 5747}),
 Document(page_content='Lophiiformes include anglerfish', metadata={'编号': 524}),
 Document(page_content='squids live deep in the ocean', metadata={'编号': 4281}),
 Document(page_content='sharks have gills', metadata={'编号': 4112}),
 Document(page_content='clams live at the bottom of the ocean', metadata={'编号': 5694}),
 Document(page_content='flounder are aquatic animals', metadata={'编号': 2446}),
 Document(page_content='angler fish use lights to attract prey', metadata={'编号': 1483}),
 Document(page_content='sharks are carnivores', metadata={'编号': 4110}),
 Document(page_content='a fish lives in water', metadata={'编号': 5304})]

In [13]:
"""
链式完成以下两个步骤
1.根据问题和可选事实，找到依赖事实
2.根据问题和依赖事实找到答案
"""
from langchain import PromptTemplate

template="""Please select the most relevant knowledge from the following based on the question. If it does not exist, please return "none". If it does, please provide the corresponding knowledge.
Question: {question} 
Supported facts: {fact_list}"""

prompt_template = PromptTemplate(input_variables=["question","fact_list"], template=template)

In [14]:
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9)

In [15]:
print(prompt_template.format(question="colorful socks",fact_list="1 socks"))

Please select the most relevant knowledge from the following based on the question. If it does not exist, please return "none". If it does, please provide the corresponding knowledge.
Question: colorful socks 
Supported facts: 1 socks


In [16]:
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt_template)

In [18]:
chain.run({
    "question":"e",
    "fact_list":"e"
})
# -> '\n\nSocktastic!'

'(1), a(2), e(3), c(4)\n\ne(1)'

In [30]:
#将支持事实和编号进行返回
def get_fact_and_number(docs):
    fact_list=""
    for doc in docs:
        number="number-"+str(doc.metadata["编号"])+":"
        content=doc.page_content
        fact_list+=number
        fact_list+=content
        fact_list+="\n"
    return fact_list

In [31]:
fact_list=get_fact_and_number(docs)

In [32]:
fact_list

'number-340:Examples of deep sea animals are angler fish and frilled sharks\nnumber-5747:deep sea animals live deep in the ocean\nnumber-524:Lophiiformes include anglerfish\nnumber-4281:squids live deep in the ocean\nnumber-4112:sharks have gills\nnumber-5694:clams live at the bottom of the ocean\nnumber-2446:flounder are aquatic animals\nnumber-1483:angler fish use lights to attract prey\nnumber-4110:sharks are carnivores\nnumber-5304:a fish lives in water\n'

In [33]:
ques="Frilled sharks and angler fish live far beneath the surface of the ocean, which is why they are known as"
print(prompt_template.format(question=ques,fact_list=fact_list))

Please select the most relevant knowledge from the following based on the question. If it does not exist, please return "none". If it does, please provide the corresponding knowledge.
Question: Frilled sharks and angler fish live far beneath the surface of the ocean, which is why they are known as 
Supported facts: number-340:Examples of deep sea animals are angler fish and frilled sharks
number-5747:deep sea animals live deep in the ocean
number-524:Lophiiformes include anglerfish
number-4281:squids live deep in the ocean
number-4112:sharks have gills
number-5694:clams live at the bottom of the ocean
number-2446:flounder are aquatic animals
number-1483:angler fish use lights to attract prey
number-4110:sharks are carnivores
number-5304:a fish lives in water



In [37]:
chain.run({
    "question":ques,
    "fact_list":fact_list
})

'\nnumber-1483:angler fish use lights to attract prey'

In [None]:
# 1.不同的模型，对我们这个任务的性能 2.不同的嵌入的模型的信息检索能力
# 1.回答问题的准确率，信息检索的相关性