-
Notifications
You must be signed in to change notification settings - Fork 12
/
rag_search_pinecone_pdf_docs.py
96 lines (80 loc) · 3.75 KB
/
rag_search_pinecone_pdf_docs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import sys
sys.path.insert(0, "llm-prompts")
import os
from anthropic import Anthropic
from llm_clnt_factory_api import ClientFactory, get_commpletion
from rag_utils import print_matches, extract_matches
from pinecone import Pinecone, PodSpec
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv, find_dotenv
#
# Example code to search the Pinecode index for similarity search
# of the PDF document indexed.
#
if __name__ == "__main__":
os.environ["TOKENIZERS_PARALLELISM"] = "false"
TOP_K = 5
# Set up Pinecone environment. Use the .env file to load the Pinecone API key
# and the environment name, which is "gcp-starter" in this case, for the GCP starter environment.
# a community edition of Pinecone is also available for free.
_ = load_dotenv(find_dotenv())
pinecone_api_key = os.getenv("PINECONE_API_KEY")
if pinecone_api_key is None:
raise ValueError("Please set the PINECONE_API_KEY environment")
pc = Pinecone(
api_key=pinecone_api_key,
environment="gcp-starter",
spec=PodSpec(environment="gcp-starter")
)
# Our starter pinecone index
index_name = "starter-index"
# Connect or get a handle to the index
pindex = pc.Index(index_name)
# create our model instance for encodin the query
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
print('-' * 50)
# Start a semantic search
print("Running a semantic search...")
query = "What are the key takeaways for AI in 2023 from the HAI_AI Index?"
print(f"Query: {query}")
query_embedding = model.encode(query).tolist()
results = pindex.query(vector=query_embedding, top_k=TOP_K,
include_values=False,
include_metadata=True)
print(f"Top {TOP_K} results for the query:")
print_matches(results)
print('-' * 50)
# Extract the context from the results for our LLM query
context = "".join(extract_matches(results))
# Construct our query plus the matches returned
# from the vector db for the LLM model to finalize
# the response
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
MODEL = os.getenv("MODEL")
print(f"Using MODEL={MODEL}; base={'Anthropic'}")
# create an Anthropic client instance using our
# client factory method
client_factory = ClientFactory()
client_type = "anthropic"
client_factory.register_client(client_type, Anthropic)
client_kwargs = {"api_key": anthropic_api_key}
# create the client
client = client_factory.create_client(client_type, **client_kwargs)
# create system and user prompt for the LLM model
system_content = """You are master of all knowledge, and a helpful sage.
You must summarize content given to you by drawing from your vast
knowledge about history, literature, science, social science, philosophy, religion, economics,
sports, etc. Do not make up any responses. Only provide information that is true and verifiable
and use the given context to provide the response.
"""
user_content = f"""What are the key takeaways for AI in 2023 from the HAI_AI Index Report_2023?,
given the {context}. Only provide information that is true and verifiable
and use the given context to provide the response.
"""
# get the response
BOLD_BEGIN = "\033[1m"
BOLD_END = "\033[0m"
response = get_commpletion(client, MODEL, system_content, user_content)
response = response.replace("```", "")
print(f"\n{BOLD_BEGIN}Prompt:{BOLD_END} {user_content}")
print(f"\n{BOLD_BEGIN}Answer:{BOLD_END} {response}")