In [None]:
import numpy as np
from pymilvus import (
    connections,
    utility,
    Collection,
    CollectionSchema,
    FieldSchema,
    DataType,
)

# 创建到Milvus服务器的连接
connections.connect("default", host="", port="")
# 获取所有的集合名称
collection_names=utility.list_collections()
print(collection_names)

search_res_list=[]
# 遍历所有的集合
for collection_name in collection_names:
    print(f"Collection: {collection_name}")

    # 获取集合对象
    collection = Collection(collection_name)
    collection.load()

    dim=768  #m3e-base生成768维向量
    query_vector = np.random.rand(1, dim).tolist()
    search_params = {
        "metric_type": "IP",
        "params": {"nprobe": 20}
    }
    topk=16384
    results = collection.search(
        data=query_vector,
        anns_field="index_vector",
        param=search_params,
        limit=topk,
        output_fields=["content", "metadata"]
    )
    print(results)


    for hits in results:
        for hit in hits:
            entity = hit.to_dict()['entity']
            search_res_list.append(entity['content'])
            print(entity['content'])


In [None]:
#根据搜索结果构建知识图谱
#不要随意运行本部分
from py2neo import Graph, Node, Relationship
from py2neo import NodeMatcher, RelationshipMatcher
import spacy
from tqdm import tqdm
import re

graph=Graph('http://localhost:7474')

nlp = spacy.load("zh_core_web_md")

patterns = r'。|？|！|（|）|；|\r'

for search_text in search_res_list:
    lines = re.split(patterns,search_text)
    for line in tqdm(lines):
        #print(line)
        line_node=Node("TEXT",text=line)
        graph.merge(line_node,"TEXT","text")
        if line!="":
            doc=nlp(line)
            # for token in doc:
            #     #print(token.text,token.pos_,token.tag_)
            #     print(token.text,token.dep_,token.head)
            # for chunk in doc.noun_chunks:
            #     print(chunk.text)
            #print("===========================")
            for ent in doc.ents:
                #print (ent.text, ent.label_)
                ent_node=Node(ent.label_,text=ent.text)
                graph.merge(ent_node,ent.label_,"text")
                relation=Relationship(line_node,"include",ent_node)
                graph.create(relation)



In [None]:
#query实体获取
import spacy
nlp = spacy.load("zh_core_web_md")

text=[user_query]
entity_list=[]
for onetext in text:
    doc=nlp(onetext)

    for ent in doc.ents:
        if ent not in entity_list:
            #print (ent.text, ent.label_)
            entity_list.append(ent.text)

print(entity_list)

In [None]:
#搜索全局知识图谱
from py2neo import Graph, Node, Relationship
from py2neo import NodeMatcher, RelationshipMatcher
import spacy
from tqdm import tqdm
import re

graph=Graph('http://222.29.98.160:7475')
node_matcher = NodeMatcher(graph)
relationship_matcher = RelationshipMatcher(graph)


text_list = []

for entity in entity_list:
    doc=nlp(entity)
    for ent in doc.ents:
        print (ent.text, ent.label_)
        query_ent_node = node_matcher.match(ent.label_,text=ent.text).first()
        relationships = list(relationship_matcher.match((None,query_ent_node), r_type=None))
        #print(relationships)
        for relationship in relationships:
            #node_text=relationship.end_node["text"]
            node_text=relationship.start_node["text"]
            if node_text not in text_list:
                print(node_text)
                text_list.append(node_text)
        # if ent.text not in text_list:
        #     text_list.append(ent.text)

text_chunk=""
for text in text_list:
    text_chunk+=text

print(text_list)

In [None]:
#手动增加实体
#搜索临时知识图谱
from py2neo import Graph, Node, Relationship
from py2neo import NodeMatcher, RelationshipMatcher
import spacy
from tqdm import tqdm
import re

graph=Graph('http://222.29.98.73:7474')
node_matcher = NodeMatcher(graph)
relationship_matcher = RelationshipMatcher(graph)


new_entity=[]

for entity in new_entity:
    cypher_ = "Match  (a)  where  a.text Contains  '{ent}' return  a".format(ent=entity)
    #print(cypher_)
    df = graph.run(cypher_).data()#.to_data_frame() # pd.DataFrame
    print(df)
    for i in df:
        print(i['a']['text'])

text_chunk=""
for text in text_list:
    text_chunk+=text

print(text_list)

In [None]:
#利用大语言模型回答
import openai

openai.api_base = ''
openai.api_key = ''

augment_query="问题："+ user_query + "\n"
augment_query+="相关资料："+text_chunk
history=[]
history.append({"role": "user", "content": augment_query})


response = openai.ChatCompletion.create(
            model='',
            messages=history,
            temperature=0.2
        )

answer= response.choices[0].message.content

print(answer)
