Point 7: Use verified retrieved documents to Prompt LLM

This is an advanced RAG Technique called Child-Parent RecursiveRetriever using Mistral LLM

In [None]:
!pip install pinecone-client
!pip install sentence-transformers
!pip install llama-index --use-deprecated=legacy-resolver
!pip install langchain
!pip install replicate

In [None]:
from llama_index.response.notebook_utils import display_source_node
from llama_index.retrievers import RecursiveRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index import VectorStoreIndex, ServiceContext
from llama_index.llms import OpenAI
import json

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "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"

In [None]:
import pinecone
api_key = "3da0e6b6-40a1-4094-9ab1-ca22a2a98621"
pinecone.init(api_key=api_key, environment="gcp-starter")

  from tqdm.autonotebook import tqdm


In [None]:
pinecone.describe_index("langchain-rag")
pinecone_index = pinecone.Index("langchain-rag")

In [None]:
from llama_index import VectorStoreIndex, ServiceContext
from llama_index.vector_stores import PineconeVectorStore

vector_store = PineconeVectorStore(
    pinecone_index=pinecone_index,
    add_sparse_vector=True,
)

In [None]:
os.environ["REPLICATE_API_TOKEN"]="r8_B4QZzdaf3iZheDalQ1PPMKhXxIOSf862e759H"

In [None]:
from llama_index.llms import Replicate

mistral = Replicate(
    model="mistralai/mistral-7b-instruct-v0.1:83b6a56e7c828e667f21fd596c338fd4f0039b46bcfa18d973e8e70e455fda70"
)

In [None]:
# Create our retriever.
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=mistral)
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)

# Fetch the top 5 most relevant chunks.
retriever = index.as_retriever(similarity_top_k=5)

In [None]:
questions =[
    "What are the side effects of doxycycline?",
    "What are the side effects of spironolactone?",
    "What are the side effects of minocycline?",
    "What are the side effects of Accutane?",
    "What are the side effects of clindamycin?",
    "What are the side effects of Aldactone?",
    "What are the side effects of tretinoin?",
    "What are the side effects of isotretinoin?",
    "What are the side effects of Bactrim ?",
    "What are the side effects of Retin-A ?"]


In [None]:
from llama_index import Document
from llama_index.node_parser import SimpleNodeParser
from llama_index.schema import IndexNode
from llama_index.node_parser import SentenceSplitter

responses = []
for question in questions:
  #query = "What are the side effects of drugs?"
  nodes = retriever.retrieve(question)

  doc_text = "\n\n".join([d.get_content() for d in nodes])

  docs= [Document(text=doc_text)]

  node_parser = SentenceSplitter(chunk_size=1024, chunk_overlap=20)

  base_nodes = node_parser.get_nodes_from_documents(docs)

  for idx, node in enumerate(base_nodes):
    node.id_ = f"node-{idx}"

  base_nodes = node_parser.get_nodes_from_documents(docs)

  base_index = VectorStoreIndex(base_nodes, service_context=service_context)
  base_retriever = base_index.as_retriever(similarity_top_k=2)

  retrievals = base_retriever.retrieve(
    question
  )

  query_engine_base = RetrieverQueryEngine.from_args(
    base_retriever, service_context=service_context
  )



  #responses.append(str(response))
  # print(str(response))
  sub_chunk_sizes = [250, 256, 512]
  sub_node_parsers = [
    SimpleNodeParser.from_defaults(chunk_size=c) for c in sub_chunk_sizes
  ]

  all_nodes = []
  for base_node in base_nodes:
    for n in sub_node_parsers:
        sub_nodes = n.get_nodes_from_documents([base_node])
        sub_inodes = [
            IndexNode.from_text_node(sn, base_node.node_id) for sn in sub_nodes
        ]
        all_nodes.extend(sub_inodes)

    # also add original node to node
    original_node = IndexNode.from_text_node(base_node, base_node.node_id)
    all_nodes.append(original_node)

  all_nodes_dict = {n.node_id: n for n in all_nodes}

  vector_index_chunk = VectorStoreIndex(
    all_nodes, service_context=service_context
  )
  vector_retriever_chunk = vector_index_chunk.as_retriever(similarity_top_k=2)

  retriever_chunk = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever_chunk},
    node_dict=all_nodes_dict,
    verbose=True,
  )

  query_engine = RetrieverQueryEngine.from_args(
    retriever_chunk
  )

  response = query_engine.query(
    question
  )

  responses.append(str(response))

  # nodes = retriever_chunk.retrieve(
  #   question

  # )
  # for node in nodes:
  #   display_source_node(node, source_length=2000)

  # response = query_engine_base.query(
  #   question
  # )



# print(docs)


[1;3;34mRetrieving with query id None: What are the side effects of doxycycline?
[0m[1;3;38;5;200mRetrieved node with id, entering: ceb9e608-bd70-441a-9d81-33d6c874640c
[0m[1;3;34mRetrieving with query id ceb9e608-bd70-441a-9d81-33d6c874640c: What are the side effects of doxycycline?
[0m[1;3;34mRetrieving with query id None: What are the side effects of spironolactone?
[0m[1;3;38;5;200mRetrieved node with id, entering: d94975eb-c0a9-41fa-bb9f-2dd36abcdc96
[0m[1;3;34mRetrieving with query id d94975eb-c0a9-41fa-bb9f-2dd36abcdc96: What are the side effects of spironolactone?
[0m[1;3;34mRetrieving with query id None: What are the side effects of minocycline?
[0m[1;3;38;5;200mRetrieved node with id, entering: d22268d4-1c7a-43ac-a5e5-6c57f5e1ecf2
[0m[1;3;34mRetrieving with query id d22268d4-1c7a-43ac-a5e5-6c57f5e1ecf2: What are the side effects of minocycline?
[0m[1;3;34mRetrieving with query id None: What are the side effects of Accutane?
[0m[1;3;38;5;200mRetrieved node

In [None]:
# for i in responses:
#   print(i)
#   print(len(i))
for index, r in enumerate(responses):
    print(f"Response {index + 1}: {r}\n")

Response 1: The side effects of doxycycline may include nausea and vomiting, upset stomach, loss of appetite, mild diarrhea, skin rash or itching, darkened skin color, vaginal itching or discharge. In rare cases, serious side effects may occur, such as severe stomach pain, diarrhea that is watery or bloody, throat irritation, trouble swallowing, chest pain, irregular heart rhythm, feeling short of breath, little or no urination, low white blood cell counts, severe headaches, ringing in the ears, dizziness, nausea, vision problems, pain behind the eyes, loss of appetite, upper stomach pain, tiredness, nausea or vomiting, fast heart rate, dark urine, jaundice.

Response 2: The side effects of spironolactone may include breast swelling or tenderness, drowsiness, dizziness, lack of energy, leg cramps, weakness, feeling like you might pass out, severe pain in your upper stomach spreading to your back, nausea and vomiting, electrolyte imbalance, and high or low potassium levels.

Response 3: