Hay que subir todo esto a Github!!

# Ingesting base documents

In [11]:
# https://medium.com/@shikhararyan/how-to-create-a-vector-database-using-your-data-from-your-files-01794986ccfe
# https://github.com/nlmatics/llmsherpa#layoutpdfreader
# https://www.llamaindex.ai/blog/mastering-pdfs-extracting-sections-headings-paragraphs-and-tables-with-cutting-edge-parser-faea18870125
# https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/install-docker.html
# https://medium.com/@nanceeezhong/implementing-retrieval-augmented-generation-rag-for-pdf-file-with-llmsherpa-lancedb-and-openai-dfd1e879e915
# https://llamahub.ai/l/readers/llama-index-readers-smart-pdf-loader?from=all

# FOR CLEANING: https://docs.pinecone.io/integrations/llamaindex

# RUN PDF PARSING: docker run -p 80:5001 ghcr.io/nlmatics/nlm-ingestor:latest


### Global imports

In [12]:
from typing import Optional

In [13]:
%run Firebase.ipynb
%run Bucket.ipynb

### Environment variables

In [14]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# OPENAI
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# AWS
AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_REGION = os.getenv("AWS_REGION")
AWS_S3_OUTPUT_BUCKET = os.getenv("AWS_S3_OUTPUT_BUCKET")
AWS_S3_INPUT_BUCKET = os.getenv("AWS_S3_INPUT_BUCKET")

# FIRESTORE
CREDENTIALS_JSON_NAME = os.getenv("CREDENTIALS_JSON_NAME")
FIRESTORE_COLLECTION = os.getenv("FIRESTORE_COLLECTION")
FIRESTORE_PROJECT = os.getenv("FIRESTORE_PROJECT")
FIRESTORE_DATABASE = os.getenv("FIRESTORE_DATABASE")

# HUGGINFACE
HF_TOKEN = os.getenv("HF_TOKEN")

# PINECONE
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

# RAGs

## Base RAG

In [15]:
# Empty list of rags for evaluation comparison
rags = {}

In [None]:
from trulens.apps.custom import instrument

class RAG:
  """
    Sets up a RAG with a Pinecone index and a OpenAI LLM model with llama-index.
    This is the BASE version.
  """

  def __str__(self):
    return self.name

  def __init__(self, full_refresh:bool = False):
    self.name = "BaseRag"
    self.index_name = f"indexes/{self.name}_index"

    self._setup(full_refresh)

  def _setup(
    self,
    full_refresh:bool = False,
    similarity_top_k:int = 4
    ) -> None:
    """
    Sets up the RAG step-by-step.

    Args:
    full_refresh (bool): Whether to build the index from 0.
    similarity_top_k (int): Number of similar documents to retrieve from the index.
    """
    
    from llama_index.core import (
      VectorStoreIndex,
      StorageContext,
      load_index_from_storage
    )
    from llama_index.vector_stores.pinecone import PineconeVectorStore
    self.log_divider = "\n" + "*"*100 + "\n"

    if not full_refresh:
      print("Not building index from 0...")
      self.set_pinecone_index()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      storage_context = StorageContext.from_defaults(
          persist_dir=self.index_name,
          vector_store= self.vector_store
      )
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
        ).as_retriever(
          similarity_top_k=4
      )
      index = load_index_from_storage(storage_context)
    else:
      print("Building index from 0❗️")

      input_documents = self.set_input_documents()
      self.set_pinecone_index()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      self.storage_context = StorageContext.from_defaults(
        vector_store= self.vector_store
      )
      self.set_up_embedding_model()
      self.set_up_llm_model()
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
        ).as_retriever(
        similarity_top_k=similarity_top_k
      )

      print("\nFinished setting up dependencies. 🎉")

      print("\nBuilding index...")

      index = VectorStoreIndex.from_documents(
          input_documents,
          embed_model=self.embed_model,
          storage_context=self.storage_context,
          text_key="text",
          show_progress=True
        )
      index.storage_context.persist(self.index_name)

    print("\nFinished building index 🚀\n")

    self.set_up_prompt()
    self.query_engine = index.as_query_engine(
        text_qa_template=self.prompt
    )

    print(self.pc_index.describe_index_stats())
    self.display_prompt_dict()

  @instrument
  def retrieve(self, query:str, only_text:bool = False) -> list:
    """
    Retrieves documents from the vector store given a query.

    Args:
    query (str): The query to retrieve documents for.
    only_text (bool): Whether to return only the text of the documents.

    Returns:
    list: List of dictionaries with the text, score, and source of the document.
    """
    
    try:
      print("Retrieving documents from vector store...")
      results = self.retriever.retrieve(query)
    except Exception as e:
      print("Error retrieving documents:", e)
      results = [] 
      
    if len(results) > 0:
      print(f"Retrieved {len(results)} documents.")
      if only_text:
        print("Returning only text...")
        return [doc.node.text for doc in results]

      return [
          {
              "text": doc.node.text,
              "score": doc.score,
              "source": doc.metadata
          } for doc in results
        ]

  @instrument
  def query(self, query:str) -> str:
    """
    Queries the RAG with a given question.

    Args:
    query (str): The question to query the RAG with.

    Returns:
    str: The answer to the question.
    """
    context_str = self.retrieve(query, only_text=True)
    completion = self.generate_completion(query, context_str)
    return completion
  
  
  @instrument
  def generate_completion(self, query: str, context_str: list) -> str:
    """
    Generate answer from context.
    """
    if len(context_str) == 0:
        return "Sorry, I couldn't find an answer to your question."

    try:
      result = self.query_engine.query(query)
      return result
    except Exception as e:
      print("Error generating completion:", e)
      return "Sorry, I couldn't find an answer to your question."


  def set_up_prompt(self) -> None:
    """
    Sets up the prompt for the RAG.
    """
    from llama_index.core import PromptTemplate
    qa_template_text = (
        "You will receive a question in Spanish.\n"
        "The question is based in the given context, since it contains the theory on which the question must be answered.\n"
        "Context information is below.\n"
        "---------------------\n"
        "{context_str}\n"
        "---------------------\n"
        "Given the context information and not prior knowledge, "
        "answer the query in Spanish.\n"
        "Query: {query_str}\n"
        "Answer: "
    )
    self.prompt = PromptTemplate(qa_template_text)


  def display_prompt_dict(self) -> None:
    """
    Displays the prompt dictionary for the RAG.
    """
    from IPython.display import Markdown, display
    prompts_dict = self.query_engine.get_prompts()
    for k, p in prompts_dict.items():
        text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
        display(Markdown(text_md))
        print(p.get_template())
        display(Markdown("<br><br>"))


  def set_up_llm_model(self, chunk_size:Optional[int] = None) -> None:
    """
    Sets up the LLM model for the RAG.

    Args:
    chunk_size (int): The chunk size to use for the LLM model input.
    """
    from llama_index.llms.openai import OpenAI
    from llama_index.core import Settings

    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
    open_ai_model = "gpt-4o"

    print("Initializing llm model...")
    llm_model = OpenAI(
        model=open_ai_model,
        temperature=0.1,
        max_tokens=1000,
        logprobs=False,
        default_headers={}
    )

    if chunk_size:
      Settings.chunk_size = chunk_size

    Settings.llm = llm_model
    self.llm_model = llm_model
    print("Finished setting up llm model:", llm_model)
    print(self.log_divider)


  def set_input_documents(self) -> list:
    """
    Sets up the input documents for the RAG.
    These documents will be downloaded from firebase and used to build the context.
    The documents are llama-index document objects, not .pdf files or such.

    Returns:
    list: List of llama-index documents.
    """
    pdf_files_titles = [
      "Tipos de aprendizaje.pdf",
      "Algoritmo de Bayes Ingenuo.pdf",
      "Conceptos de evaluación.pdf"
    ]
    files_documents = []
    firebase = Firebase()

    for file in pdf_files_titles:
      print(f"\nGetting all documents for {file} from Firebase")
      # Tratar de agarrar una muestra de X documentos de cada archivo.
      llama_index_documents = firebase.get_all_documents(
          limit=50,
          document_title=file
        )
      #llama_index_documents = documents_to_llama_index_documents(documents)
      print(f"Downloaded {len(llama_index_documents)} documents for {file} from Firebase")
      files_documents += llama_index_documents

    print(f"Downloaded {len(files_documents)} documents from Firebase")
    print(self.log_divider)
    return files_documents[:5]

  def set_up_embedding_model(self) -> None:
    """
    Sets up the embedding model for the RAG used to encode the documents.
    """
    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
    from llama_index.core import Settings
    import os

    if "OPENAI_API_KEY" in os.environ:
        # To force the model to use the HuggingFace model and token
        del os.environ["OPENAI_API_KEY"]

    os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN

    print("Initializing embedding model...")
    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-m3")

    Settings.embed_model = embed_model
    self.embed_model = embed_model

    # just in case
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
    print("Finished setting up embedding model:", embed_model)
    print(self.log_divider)


  def set_pinecone_index(self) -> None:
    """
      Set up the context used by the RAG with Pinecone.
    """
    from pinecone import Pinecone, ServerlessSpec
    print("Initializing pinecone...")

    pc = Pinecone(api_key=PINECONE_API_KEY)

    pc_index_name = "quickstart"

    try:
      pc.create_index(
          name=f"{pc_index_name}",
          dimension=1024,
          metric="euclidean",
          spec=ServerlessSpec(cloud="aws", region="us-east-1"),
      )
    except Exception as e:
      print(e)
    finally:
      pc_index = pc.Index(f"{pc_index_name}")
      print("Finished setting up pinecone index:", pc_index.describe_index_stats())
      self.pc_index = pc_index
      print(self.log_divider)


  def get_pinecone_record(self, document_id:str) -> dict:
    """
    Get a record from Pinecone by document ID.

    Args:
    document_id (str): The document ID to retrieve from Pinecone.

    Returns:
    dict: The record from Pinecone.
    """
    response = self.storage_context.query(
      filter={"doc_id": {"$eq": document_id}},
      vector=[0] * 1024,
      top_k=1,
      include_metadata=True
    )
    return response["matches"][0]


  def get_evaluation_questions(self) -> str:
    """
    Downloads the evaluation questions from S3.

    Returns:
    str: The local path to the evaluation questions file.
    """
    bucket = Bucket(AWS_S3_OUTPUT_BUCKET)
    eval_questions_path = bucket.download_object(
      "evaluation_questions.txt",
      "evaluation",
      return_file_path=True
    )
    print("Evaluation questions have been downloaded in the following path:", eval_questions_path)
    return eval_questions_path

# try:
#   rag = RAG()
# except:
#   print("No index persisted.")
rag = RAG(full_refresh=True)
rags[rag.name] = rag

try: 
  rag.query("¿Qué es el aprendizaje supervisado?")
except Exception as e:
  print("RAG created incorrectly: ", e)


## Diferentes tecnicas de retrieval de contexto

### Using Sentence Window Retrieval

In [None]:
class SentenceRetrievalRag(RAG):
  """
    Child class of RAG that sets up a Sentence Retrieval RAG with Pinecone.
    By applying a node parser, the RAG can retrieve sentences instead of
    full documents when building the context or any given query.
    Source: https://docs.llamaindex.ai/en/stable/examples/node_postprocessor/MetadataReplacementDemo/
  """

  def __init__(self, full_refresh:bool = False, similarity_top_k:int = 4):
    self.name = "SentenceRetrievalRag"
    self.index_name = f"indexes/{self.name}_index"

    self._setup(full_refresh, similarity_top_k)


  def set_node_parser(
      self,
      window_size:int = 4,
      window_metadata_key:str = "window"
  ) -> None:
    """
    Sets up the node parser for the RAG.
    
    Args:
    window_size (int): The window size to use for the node parser.
    window_metadata_key (str): The metadata key to use for the window.
    """
    from llama_index.core.node_parser import SentenceWindowNodeParser
    from llama_index.core.postprocessor import MetadataReplacementPostProcessor
    from llama_index.core import Settings

    print("Initializing node parser...")
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=window_size,
        window_metadata_key=window_metadata_key,
        original_text_metadata_key="text",
    )
    self.node_parser = node_parser
    Settings.node_parser = node_parser

    # Sentence Window postprocessors
    print("Initializing postprocessors for nodes...")
    self.post_processors = [
        MetadataReplacementPostProcessor(target_metadata_key="window"),
        # SentenceTransformerRerank(
        #     top_n=rerank_top_n,
        #     model="BAAI/bge-reranker-base"
        # )
    ]
    print("Finished setting up node parser:", node_parser)
    print(self.log_divider)


  def _setup(
    self,
    full_refresh:bool = False,
    similarity_top_k:int = 4
    ) -> None:
    """
    Sets up the RAG step-by-step.

    Args:
    full_refresh (bool): Whether to build the index from 0.
    similarity_top_k (int): Number of similar documents to retrieve from the index.
    """
    from llama_index.vector_stores.pinecone import PineconeVectorStore
    from llama_index.core import (
      VectorStoreIndex, 
      StorageContext, 
      load_index_from_storage
    )
    self.log_divider = "\n" + "*"*100 + "\n"

    if not full_refresh:
      print("Not building index from 0...")
      self.set_pinecone_index()
      self.set_node_parser()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      storage_context = StorageContext.from_defaults(
          persist_dir=self.index_name,
          vector_store= self.vector_store
      )
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
      ).as_retriever(
        similarity_top_k=4
      )
      print("Loading index from storage...")
      index = load_index_from_storage(
          storage_context
      )
    else:
      print("Building index from 0❗️")

      input_documents = self.set_input_documents()[:5]
      self.set_pinecone_index()
      self.set_node_parser()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      self.storage_context = StorageContext.from_defaults(vector_store= self.vector_store)
      self.set_up_embedding_model()
      self.set_up_llm_model()
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
      ).as_retriever(
        similarity_top_k=4
      )

      print("\nFinished setting up dependencies. 🎉")

      print("\nBuilding index...")
      index = VectorStoreIndex(
          input_documents,
          storage_context=self.storage_context,
          text_key="text",
          show_progress=True
        )
      index.storage_context.persist(self.index_name)

    print("\nFinished building index 🚀\n")

    self.set_up_prompt()
    self.query_engine = index.as_query_engine(
        text_qa_template=self.prompt,
        node_postprocessors=self.post_processors,
        similarity_top_k=similarity_top_k
    )

    print(self.pc_index.describe_index_stats())
    self.display_prompt_dict()



try:
  rag_2 = SentenceRetrievalRag()
except:
  print("No index persisted.")
  rag_2 = SentenceRetrievalRag(full_refresh=True)
rags[rag_2.name] = rag_2


### Using automerging index

In [None]:
class AutoMergingRetrievalRag(RAG):

  def __init__(self, full_refresh:bool = False, similarity_top_k:int = 4):
    self.name = "AutomergingRetrievalRag"
    self.index_name = f"indexes/{self.name}_index"

    self._setup(full_refresh, similarity_top_k)


  def set_node_parser(self) -> None:
    from llama_index.core import Settings
    from llama_index.core.node_parser import HierarchicalNodeParser

    print("Initializing node parser...")
    node_parser = HierarchicalNodeParser.from_defaults(
        chunk_sizes=[2048, 512, 128]
    )
    self.node_parser = node_parser
    Settings.node_parser = node_parser

    # Automerging postprocessors
    #print("Initializing postprocessors for nodes...")
    # self.post_processors = [
    #     SentenceTransformerRerank(
    #         top_n=rerank_top_n,
    #         model="BAAI/bge-reranker-base"
    #     )
    # ]
    print("Finished setting up node parser:", node_parser)
    print(self.log_divider)


  def _setup(
    self,
    full_refresh:bool = False,
    similarity_top_k:int = 4
  ) -> None:
    """
    Sets up the RAG step-by-step.

    Args:
    full_refresh (bool): Whether to build the index from 0.
    similarity_top_k (int): Number of similar documents to retrieve from the index.
    """
    from llama_index.core import (
      VectorStoreIndex,
      StorageContext,
      load_index_from_storage,
    )
    from llama_index.vector_stores.pinecone import PineconeVectorStore

    self.log_divider = "\n" + "*"*100 + "\n"

    if not full_refresh:
      print("Not building index from 0...")
      self.set_pinecone_index()
      self.set_node_parser()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      storage_context = StorageContext.from_defaults(
          persist_dir=self.index_name,
          vector_store= self.vector_store
      )
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
      ).as_retriever(
        similarity_top_k=4
      )
      print("Loading index from storage...")
      index = load_index_from_storage(
          storage_context
      )
    else:
      print("Building index from 0❗️")

      input_documents = self.set_input_documents()[:5]
      self.set_pinecone_index()
      self.set_node_parser()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      self.storage_context = StorageContext.from_defaults(vector_store= self.vector_store)
      self.set_up_embedding_model()
      self.set_up_llm_model()
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
      ).as_retriever(
        similarity_top_k=similarity_top_k
      )

      print("\nFinished setting up dependencies. 🎉")

      print("\nBuilding index...")
      index = VectorStoreIndex(
          input_documents,
          storage_context=self.storage_context,
          text_key="text",
          show_progress=True
        )
      index.storage_context.persist(self.index_name)

    print("\nFinished building index 🚀\n")

    self.set_up_prompt()
    self.query_engine = index.as_query_engine(
        text_qa_template=self.prompt,
        similarity_top_k=similarity_top_k
    )

    print(self.pc_index.describe_index_stats())
    self.display_prompt_dict()



try:
  rag_3 = AutoMergingRetrievalRag()
except:
  print("No index persisted.")
  rag_3 = AutoMergingRetrievalRag(full_refresh=True)

rags[rag_3.name] = rag_3


# Re-rank results

In [None]:
class RankRag(RAG):
  """
    
  """

  def __str__(self):
    return self.name

  def __init__(self, full_refresh:bool = False, rerank_top_n:int = 4):
    self.name = "RankRag" + f"_top{rerank_top_n}n"
    print("Initializing :", self.name)
    self.index_name = f"indexes/{self.name}_index"

    self._setup(full_refresh)

  
  def _get_post_processors(self, rerank_top_n:int = 4) -> list:
    from llama_index.core.postprocessor import SentenceTransformerRerank

    return [
        SentenceTransformerRerank(
            top_n=rerank_top_n,
            model="BAAI/bge-reranker-base"
        )
    ]



  def _setup(
    self,
    full_refresh:bool = False,
    similarity_top_k:int = 4,
    rerank_top_n:int = 4
    ) -> None:
    """
    Sets up the RAG step-by-step.

    Args:
    full_refresh (bool): Whether to build the index from 0.
    similarity_top_k (int): Number of similar documents to retrieve from the index.
    """
    
    from llama_index.core import (
      VectorStoreIndex,
      StorageContext,
      load_index_from_storage
    )
    from llama_index.vector_stores.pinecone import PineconeVectorStore
    self.log_divider = "\n" + "*"*100 + "\n"

    if not full_refresh:
      print("Not building index from 0...")
      self.set_pinecone_index()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      storage_context = StorageContext.from_defaults(
          persist_dir=self.index_name,
          vector_store= self.vector_store
      )
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
        ).as_retriever(
          similarity_top_k=4
      )
      index = load_index_from_storage(storage_context)
    else:
      print("Building index from 0❗️")

      input_documents = self.set_input_documents()
      self.set_pinecone_index()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      self.storage_context = StorageContext.from_defaults(
        vector_store= self.vector_store
      )
      self.set_up_embedding_model()
      self.set_up_llm_model()
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
        ).as_retriever(
        similarity_top_k=similarity_top_k
      )

      print("\nFinished setting up dependencies. 🎉")

      print("\nBuilding index...")

      index = VectorStoreIndex.from_documents(
          input_documents,
          embed_model=self.embed_model,
          storage_context=self.storage_context,
          text_key="text",
          show_progress=True
        )
      index.storage_context.persist(self.index_name)

    print("\nFinished building index 🚀\n")

    self.set_up_prompt()
    self.query_engine = index.as_query_engine(
        text_qa_template=self.prompt,
        node_postprocessors=self._get_post_processors(rerank_top_n)
    )

    print(self.pc_index.describe_index_stats())
    self.display_prompt_dict()


rag = RankRag(rerank_top_n = 3, full_refresh=True)
rags[rag.name] = rag

# Query Rewrite

## Apply MultiQueriesRetriever

In [None]:
class RAGWithMultiQueries(RAG):

  def __str__(self):
    return self.name

  def __init__(self, full_refresh:bool = False):
    self.name = "RAGWithMultiQueries"
    self.index_name = f"indexes/{self.name}_index"

    self._setup(full_refresh)

  def _get_multi_step_query_engine(self, query_engine, steps_n:int = 3):
    from llama_index.core.query_engine import MultiStepQueryEngine
    from llama_index.core.indices.query.query_transform.base import StepDecomposeQueryTransform  
    from llama_index.llms.openai import OpenAI

    gpt = OpenAI(
        temperature=0, 
        model="gpt-4o-mini",
        logprobs=False,
        default_headers={}
    )
    step_decompose_transform = StepDecomposeQueryTransform(llm=gpt)
    print("Setting up multi-step query engine...")
    return MultiStepQueryEngine(
      query_engine=query_engine,
      query_transform=step_decompose_transform,
      index_summary="Eres usado para responder preguntas en base al material o texto de unos archivos académicos en pdf",
    )


  def _setup(
    self,
    full_refresh:bool = False,
    similarity_top_k:int = 4
    ) -> None:
    """
    Sets up the RAG step-by-step.

    Args:
    full_refresh (bool): Whether to build the index from 0.
    similarity_top_k (int): Number of similar documents to retrieve from the index.
    """
    
    from llama_index.core import (
      VectorStoreIndex,
      StorageContext,
      load_index_from_storage
    )
    from llama_index.vector_stores.pinecone import PineconeVectorStore
    self.log_divider = "\n" + "*"*100 + "\n"

    if not full_refresh:
      print("Not building index from 0...")
      self.set_pinecone_index()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      storage_context = StorageContext.from_defaults(
          persist_dir=self.index_name,
          vector_store= self.vector_store
      )
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
        ).as_retriever(
          similarity_top_k=4
      )
      index = load_index_from_storage(storage_context)
    else:
      print("Building index from 0❗️")

      input_documents = self.set_input_documents()
      self.set_pinecone_index()
      self.vector_store = PineconeVectorStore(pinecone_index=self.pc_index)
      self.storage_context = StorageContext.from_defaults(
        vector_store= self.vector_store
      )
      self.set_up_embedding_model()
      self.set_up_llm_model()
      self.retriever = VectorStoreIndex.from_vector_store(
        self.vector_store
        ).as_retriever(
        similarity_top_k=similarity_top_k
      )

      print("\nFinished setting up dependencies. 🎉")

      print("\nBuilding index...")

      index = VectorStoreIndex.from_documents(
          input_documents,
          embed_model=self.embed_model,
          storage_context=self.storage_context,
          text_key="text",
          show_progress=True
        )
      index.storage_context.persist(self.index_name)

    print("\nFinished building index 🚀\n")

    self.set_up_prompt()
    query_engine = index.as_query_engine(
        text_qa_template=self.prompt
    )
    self.query_engine = self._get_multi_step_query_engine(query_engine)

    print(self.pc_index.describe_index_stats())
    self.display_prompt_dict()

try:
  rag_with_multi_queries = RAGWithMultiQueries()
except:
  print("No index persisted.")
  rag_with_multi_queries = RAGWithMultiQueries(full_refresh=True)

rags[rag_with_multi_queries.name] = rag_with_multi_queries

# Finetune Embedding

In [None]:
class FinetunedRAG(RAG):
  """
  RAG with finetuned embedding
  """
  def __init__(self, full_refresh:bool = False, similarity_top_k:int = 4):
    self.name = "FinetunedRAG"
    self.index_name = f"indexes/{self.name}_index"
    self._setup(full_refresh, similarity_top_k)

  def set_up_embedding_model(self) -> None:
    """
    Sets up the embedding model for the RAG used to encode the documents.
    """
    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
    from llama_index.core import Settings
    import os
    import pickle

    if "OPENAI_API_KEY" in os.environ:
        # To force the model to use the HuggingFace model and token
        del os.environ["OPENAI_API_KEY"]

    os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN

    print("Initializing embedding model...")
    
    if not os.path.exists("models/finetuned_model.pkl"):
      bucket = Bucket(AWS_S3_OUTPUT_BUCKET)
      bucket.download_object("finetuned_model.pkl", "models")
      
    with open("models/finetuned_model.pkl", 'rb') as f:
      embed_model = pickle.load(f)

    Settings.embed_model = embed_model
    self.embed_model = embed_model

    # just in case
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
    print("Finished setting up embedding model:", embed_model)
    print(self.log_divider)

rag = FinetunedRAG(full_refresh=True)
rags[rag.name] = rag

# Evaluation

In [None]:
rags_for_eval = list(rags.values())
print("RAGs to evaluate: ", rags_for_eval)