### Installing the required libraries

In [None]:
pip install --quiet --upgrade llama-index llama-index-vector-stores-mongodb llama-index-embeddings-openai pymongo

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/1.2 MB[0m [31m11.7 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.2/1.2 MB[0m [31m22.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.7/307.7 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.5/15.5 MB[0m [31m61.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m42.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.4/169.4 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━

### Library Imports

In [None]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.settings import Settings
from llama_index.core.vector_stores import MetadataFilter, MetadataFilters, ExactMatchFilter, FilterOperator
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
import getpass, os, pymongo, pprint

### Setting up the OpenAI API and MongoDB_Atlas Connection String

In [None]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
ATLAS_CONNECTION_STRING = getpass.getpass("MongoDB Atlas SRV Connection String:")

OpenAI API Key:··········
MongoDB Atlas SRV Connection String:··········


### LlamaIndex based LLM Settings

In [None]:
Settings.llm = OpenAI()
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
Settings.chunk_size = 256
Settings.chunk_overlap = 20

### Data Loading

In [None]:
sample_data = SimpleDirectoryReader(input_files = ["data/waitgpt.pdf"]).load_data()

### Vector Store Creation

In [None]:
mongodb_client = pymongo.MongoClient(ATLAS_CONNECTION_STRING)

atlas_vector_store = MongoDBAtlasVectorSearch(
    mongodb_client,
    db_name = "llamaindex_db",
    collection_name = "test",
    index_name = "vector_index"
)
vector_store_context = StorageContext.from_defaults(vector_store = atlas_vector_store)



### Storing data as Vector Embeddings

In [None]:
vector_store_index = VectorStoreIndex.from_documents(
   sample_data, storage_context=vector_store_context, show_progress = True
)

Parsing nodes:   0%|          | 0/14 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/110 [00:00<?, ?it/s]

### Setup Atlas Search on the Atlas WebUI using the following JSON data



In [None]:
{
   "fields": [
      {
         "type": "vector",
         "path": "embedding",
         "numDimensions": 1536,
         "similarity": "cosine"
      },
      {
         "type": "filter",
         "path": "metadata.page_label"
      }
   ]
}

{'fields': [{'type': 'vector',
   'path': 'embedding',
   'numDimensions': 1536,
   'similarity': 'cosine'},
  {'type': 'filter', 'path': 'metadata.page_label'}]}

### RAG

In [None]:
vector_store_retriever = VectorIndexRetriever(index = vector_store_index, similarity_top_k = 2)

query_engine = RetrieverQueryEngine(retriever = vector_store_retriever)

response = query_engine.query('What is WaitGPT?')

print(response)
print("\nSource documents: ")
pprint.pprint(response.source_nodes)

WaitGPT is a prototype developed to facilitate monitoring and steering of data analysis performed by LLMs. It aims to enhance error detection and increase overall confidence in the results through its usability and effectiveness, as demonstrated in a user study. WaitGPT translates stream-based code into a visualization of key data operations, allowing for granular interactions to empower users in monitoring and steering data analysis conducted by LLM agents.

Source documents: 
[NodeWithScore(node=TextNode(id_='a1d10be9-fd5e-41e2-a25f-8171f77c7087', embedding=None, metadata={'page_label': '1', 'file_name': 'waitgpt.pdf', 'file_path': 'data/waitgpt.pdf', 'file_type': 'application/pdf', 'file_size': 4796737, 'creation_date': '2024-08-16', 'last_modified_date': '2024-08-16'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', '

### RAG with metadata filtering

In [None]:
metadata_filters = MetadataFilters(
   filters=[ExactMatchFilter(key="metadata.page_label", value="12")]
)

vector_store_retriever = VectorIndexRetriever(index=vector_store_index, filters=metadata_filters, similarity_top_k=5)

query_engine = RetrieverQueryEngine(retriever=vector_store_retriever)

response = query_engine.query('What is WaitGPT?')

print(response)
print("\nSource documents: ")
pprint.pprint(response.source_nodes)

WaitGPT is a novel interface design that transforms LLM-generated code into an accessible, interactive representation to address the reliability issues and user challenges in LLM-powered data analysis tools.

Source documents: 
[NodeWithScore(node=TextNode(id_='640da0ef-c300-4876-af75-bf2e5c90af43', embedding=None, metadata={'page_label': '12', 'file_name': 'waitgpt.pdf', 'file_path': 'data/waitgpt.pdf', 'file_type': 'application/pdf', 'file_size': 4796737, 'creation_date': '2024-08-16', 'last_modified_date': '2024-08-16'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='e23667b6-3de5-4001-8624-5873c3e44715', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '12', 'file_name': 'waitgpt.pdf', 'fi

### Similarity Search

In [None]:
retriever = vector_store_index.as_retriever(similarity_top_k=2)
nodes = retriever.retrieve("How does WaitGPT addresses reliability issues and user challenge in LLM apps?")

for node in nodes:
    print(node)

Node ID: 640da0ef-c300-4876-af75-bf2e5c90af43
Text: By translating stream-based code into a growing visualization of
the key data operations and affording granular interactions, WaitGPT
empowers users to monitor and steer data analysis performed by LLM
agents. A user study (N=12) covering basic data analysis tasks
demonstrated that WaitGPT could enhance error detection rate and
improve overall co...
Score:  0.926

Node ID: a1d10be9-fd5e-41e2-a25f-8171f77c7087
Text: Publication rights licensed to ACM. This is the author’s version
of the work. It is posted here for your personal use. Not for
redistribution. The definitive Version of Record was published in The
37th Annual ACM Symposium on User Interface Software and Technology
(UIST ’24), October 13–16, 2024, Pittsburgh, PA, USA ,
https://doi.org/10.1145/365...
Score:  0.926



### Implementation of ChatEngine for Back-and-Forth Conversation

In [None]:
chat_engine = vector_store_index.as_chat_engine(
    chat_mode="condense_question", streaming=True
)
response_stream = chat_engine.stream_chat("What are the benefits of WaitGPT")
response_stream.print_response_stream()

WaitGPT facilitates monitoring and steering of data analysis performed by LLMs, enabling users to enhance error detection and increase their overall confidence in the results. Additionally, WaitGPT empowers users to monitor and steer data analysis performed by LLM agents, enhancing error detection rate and improving overall confidence in the results.

In [None]:
response_stream = chat_engine.stream_chat("How is WaitGPT able to enhance LLMs?")
response_stream.print_response_stream()

WaitGPT facilitates monitoring and steering of data analysis performed by LLMs by translating stream-based code into a growing visualization of key data operations, allowing for granular interactions. This approach empowers users to actively monitor and steer the data analysis process, leading to enhanced error detection rates and increased overall confidence in the results.