<img src="https://cdn-assets-cloud.frontify.com/local/frontify/eyJwYXRoIjoiXC9wdWJsaWNcL3VwbG9hZFwvc2NyZWVuc1wvMTk3OTA0XC80M2ZmNTdhYjc4OTdlZjUzY2IzMWUwNGU0MTVjZTY2NC0xNTYyMTAzMDk0LnBuZyJ9:frontify:7CTV2DtJsWvlctEUEyFK36JoXsZuVtHssMaDED6O5z0" width='150' />

# VECTOR SEARCH - RETRIEVAL AUGMENTED GENERATION

In [None]:
import ipywidgets as widgets
import os

mongodb_uri_widget = os.environ['MONGODB_URI']
openai_api_key_widget = os.environ['OPENAI_API_KEY']

# Retrieval Augmented Generation
### Using MongoDB Atlas, OpenAI and LangChain

In [None]:
from IPython.display import IFrame

PDF_URI = 'https://webassets.mongodb.com/MongoDB_Best_Practices_Guide.pdf'
IFrame(PDF_URI, width=1280, height=500)

# Get connection to MongoDB Atlas

In [None]:
from pymongo import MongoClient
import os

mongo_db_name = 'rag_demo'
mongo_coll_name = 'content'

mongo_client = MongoClient(mongodb_uri_widget.value)
mongo_coll = mongo_client[mongo_db_name][mongo_coll_name]
mongo_db_and_coll_path = '{}.{}'.format(mongo_db_name, mongo_coll_name)

doc_count = mongo_coll.count_documents({})
'{} document count is {:,}'.format(mongo_db_and_coll_path, doc_count)

In [None]:
# Delete existing documents -- run before demo
mongo_coll.delete_many({})

# Select embeddings/transformer model

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings(
    model='text-embedding-ada-002',
    openai_api_key=openai_api_key_widget.value
)

print('Max token length is 8,191')

# Split PDF into chunks

In [None]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader(PDF_URI)
chunked_docs = loader.load_and_split()

'PDF has resulted in {:,} chunks'.format(len(chunked_docs))

In [None]:
biggest_chunk_length = max(len(chunk.page_content.split()) for chunk in chunked_docs)
'The biggest chunk contains {:,} words'.format(biggest_chunk_length)

# Create vectors and add to MongoDB Atlas

In [None]:
from langchain.vectorstores import MongoDBAtlasVectorSearch

vector_db = MongoDBAtlasVectorSearch.from_documents(
    chunked_docs,
    embeddings_model,
    collection=mongo_coll
)

In [None]:
doc_count = mongo_coll.count_documents({})
'MongoDB document count in {} is {:,}'.format(mongo_db_and_coll_path, doc_count)

# Create MongoDB Atlas vector search index

In [None]:
from pymongo.errors import OperationFailure
import inspect

mongo_index_def = {
    'name': 'rag_demo_index',
    'definition': {
        'mappings': {
            'dynamic': True,
            'fields': {
                'embedding': {
                    'type': 'knnVector',
                    'dimensions': 1536,
                    'similarity': 'cosine'
                }
            }
        }
    }
}

try:
    mongo_coll.create_search_index(mongo_index_def)
    print('Search index is building')
except OperationFailure as e:
    print(e.details['codeName'])

# Create a LangChain handle for the vector search index

In [None]:
vector_db = MongoDBAtlasVectorSearch.from_connection_string(
    mongodb_uri_widget.value,
    mongo_db_and_coll_path,
    embeddings_model,
    index_name='rag_demo_index'
)

# Setup question function

In [None]:
from langchain.chains import ConversationalRetrievalChain
from langchain.schema.document import Document
from langchain.chat_models import ChatOpenAI

llm_model = ChatOpenAI(
    model_name='gpt-4-1106-preview',
    temperature=0.0,
    openai_api_key=openai_api_key_widget.value
)

pdf_qa = ConversationalRetrievalChain.from_llm(
    llm_model,
    vector_db.as_retriever(),
    return_source_documents=True
)

def ask_question(question):
    result = pdf_qa({'question': question, 'chat_history': []})
    print("Answer:{}\n".format(result.get('answer')))
    print('Chunks from Atlas Vector Search used for context:')
    
    for chunk in result.get('source_documents'):
        id = chunk.metadata['_id']
        page = chunk.metadata['page']
        print('ObjectId({}) | page {:,}'.format(id, page))

In [None]:
# To use a different LLM, change the above llm_model definition

# For GPT 3.5:
# from langchain.llms import OpenAI
#
# llm_model = OpenAI(
#     model_name='text-davinci-003',
#     temperature=0.0,
#     openai_api_key=openai_api_key_widget.value
# )

# For GPT 3.5 Turbo:
# from langchain.chat_models import ChatOpenAI
#
# llm_model = ChatOpenAI(
#     model_name='gpt-3.5-turbo',
#     temperature=0.0,
#     openai_api_key=openai_api_key_widget.value
# )

# For GPT 4:
# from langchain.chat_models import ChatOpenAI
#
# llm_model = ChatOpenAI(
#     model_name='gpt-4',
#     temperature=0.0,
#     openai_api_key=openai_api_key_widget.value
# )

# For GPT 4 Turbo Preview (limited to 100 reqs per day):
# from langchain.chat_models import ChatOpenAI
#
# llm_model = ChatOpenAI(
#     model_name='gpt-4-1106-preview',
#     temperature=0.0,
#     openai_api_key=openai_api_key_widget.value
# )

# Start asking questions

In [None]:
ask_question("How do I choose an instance size for MongoDB?")

In [None]:
ask_question("When should I use Compass?")

In [None]:
ask_question("How should I optimize query performance?")

In [None]:
# Use this cell to show that the majority of time spent waiting is due to the LLM, not Atlas Vector Search

import time

search_vector = embeddings_model.embed_query("How should I optimize query performance?")

before_time = time.perf_counter()
cursor = mongo_coll.aggregate([
    {
        "$vectorSearch": {
            "index": "rag_demo_index",
            "path": "embedding",
            "queryVector": search_vector,
            "numCandidates": 100,
            "limit": 4
        }
    },
    {
        "$project": {
            "_id": 1,
            "page": 1,
        }
    }
])
vector_search_ms = int((time.perf_counter() - before_time) * 1_000)
print('Atlas Vector Search roundtrip took {} ms'.format(vector_search_ms))
list(cursor)