In [None]:
# Install dependencies
!pip install llama-index openai

In [None]:
# Create openAI API key object and set the environment variable.
from google.colab import userdata
import os

open_ai_key = userdata.get('openai_api_key')
os.environ["OPENAI_API_KEY"] = open_ai_key

In [None]:
# If running on colab, run this cell to mount your google drive. Also, adjust the database and saved index paths below to point to the correct folder.
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# database_path = "/content/drive/MyDrive/research_papers_rag/documents"  # if running on colab
# database_path = "/home/akshat.pandya/research_papers_q_n_a/example_database"

# index_path = "/content/drive/MyDrive/research_papers_rag/index"  # if running on colab
# index_path = "/home/akshat.pandya/research_papers_q_n_a/example_index"

In [None]:
def create_query_engine(llm, index):
  query_engine = index.as_query_engine(llm=llm, response_mode='tree_summarize')
  return query_engine

In [None]:
def query_database(query, query_engine):
  response = query_engine.query(query)
  return response

In [None]:
# Load and index the documents. Store a local copy of the index. Run this cell only the first time you're indexing the documents.

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader(database_path).load_data()
index = VectorStoreIndex.from_documents(documents)
index.storage_context.persist(persist_dir=index_path)

In [None]:
# Load the stored index
from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=index_path)

# load index
index = load_index_from_storage(storage_context)

In [None]:
from llama_index.llms.openai import OpenAI

# Define LLM model to use for querying
llm = OpenAI(model="gpt-4o")

In [None]:
query_engine = create_query_engine(llm, index)

In [None]:
print(query_database("What is the paper Auto-RAG about?", query_engine))

In [None]:
print(query_database("List the titles of all the papers that implement a new RAG technique. Ignore the papers in the references section.", query_engine))

In [None]:
print(query_database("What are the latest advancements in bioenginnering? If the database doesn't have any information about this, please say I don't know.", query_engine))