In [18]:
import bs4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
import ollama
import chromadb

# from llama_index.llms import Ollama
# from llama_index import VectorStoreIndex, ServiceContext, download_loader
# from llama_index.storage.storage_context import StorageContext
# from llama_index.vector_stores.chroma import ChromaVectorStore

In [3]:
# Function to load, split, and retrieve documents
def load_and_retrieve_docs(url):
    loader = WebBaseLoader(
        web_paths=(url,),
        bs_kwargs=dict()
    )
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    embeddings = OllamaEmbeddings(model="mistral")
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
    return vectorstore.as_retriever()

# Function to format documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Function that defines the RAG chain
def rag_chain(url, question):
    retriever = load_and_retrieve_docs(url)
    retrieved_docs = retriever.invoke(question)
    formatted_context = format_docs(retrieved_docs)
    formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
    response = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': formatted_prompt}])
    return response['message']['content']

In [4]:
url = 'https://aws.amazon.com/what-is/retrieval-augmented-generation/'
loader = WebBaseLoader(
    web_paths=(url,),
    bs_kwargs=dict()
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [5]:
splits[0].dict()['page_content']

'What is RAG? - Retrieval-Augmented Generation Explained - AWS\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n Skip to main content\n\n\n\n\n\nClick here to return to Amazon Web Services homepage\n\n\n\nContact Us\n Support\xa0 \nEnglish\xa0\nMy Account\xa0\n\n\n\n\n Sign In\n\n\n  Create an AWS Account \n\n\n\n\n\n\n\n\n\nProducts\nSolutions\nPricing\nDocumentation\nLearn\nPartner Network\nAWS Marketplace\nCustomer Enablement\nEvents\nExplore More \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n Close \n\n\n\nعربي\nBahasa Indonesia\nDeutsch\nEnglish\nEspañol\nFrançais\nItaliano\nPortuguês\n\n\n\n\nTiếng Việt\nTürkçe\nΡусский\nไทย\n日本語\n한국어\n中文 (简体)\n中文 (繁體)\n\n\n\n\n\n Close \n\nMy Profile\nSign out of AWS Builder ID\nAWS Management Console\nAccount Settings\nBilling & Cost Management\nSecurity Credentials\nAWS Personal Health Dashboard\n\n\n\n Close \n\nSupport Center\nExpert Help\nKnowledge Center\nAWS Support Overview\nA

In [6]:
splits[1].dict()['page_content']

'Support Center\nExpert Help\nKnowledge Center\nAWS Support Overview\nAWS re:Post\n\n\n\n\n\n\n\n\n\n\n\n\nClick here to return to Amazon Web Services homepage\n\n\n\n\n\n\n\n  Get Started for Free \n\n\n  Contact Us \n\n\n\n\n\n\n\n\n\n\n\n\n Products \n Solutions \n Pricing \n Introduction to AWS \n Getting Started \n Documentation \n Training and Certification \n Developer Center \n Customer Success \n Partner Network \n AWS Marketplace \n Support \n AWS re:Post \n Log into Console \n Download the Mobile App \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nWhat is Cloud Computing?\nCloud Computing Concepts Hub\nGenerative AI\nMachine Learning & AI\n\n\nWhat Is RAG?\n\n\nCreate an AWS Account\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n             Explore Free Machine Learning Offers \n           \n\n             Build, deploy, and run machine learning applications in the cloud for free \n            \n\n\n\n\n\n\n\n\n\n\n\n             Check out Machine Learning Services \n           \n\n     

In [7]:
len(splits[0].dict()['page_content'])-200

745

In [8]:
splits[0].dict()['page_content'][745:]

'\n\n\n\n Close \n\nSupport Center\nExpert Help\nKnowledge Center\nAWS Support Overview\nAWS re:Post\n\n\n\n\n\n\n\n\n\n\n\n\nClick here to return to Amazon Web Services homepage\n\n\n\n\n\n\n\n  Get Started for Free \n\n\n  Contact Us'

In [9]:
splits[1].dict()['page_content'][0:200]

'Support Center\nExpert Help\nKnowledge Center\nAWS Support Overview\nAWS re:Post\n\n\n\n\n\n\n\n\n\n\n\n\nClick here to return to Amazon Web Services homepage\n\n\n\n\n\n\n\n  Get Started for Free \n\n\n  Contact Us \n\n\n\n\n\n\n\n\n\n\n\n'

In [10]:
embeddings = OllamaEmbeddings(model="mistral")

In [35]:
%%time
db = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory="./chroma_db")

CPU times: total: 156 ms
Wall time: 49.2 s


In [43]:
from langchain.docstore.document import Document

new_doc = Document(
    page_content="Uncle Chub is a cool guy",
    metadata={
        "source": "JointBoi",
        "page": 1
    }
)
new_docs = [new_doc]
new_docs_id = db.add_documents(
    new_docs,
    ids=["5555555555"]
)

In [44]:
%%time
db.persist()

CPU times: total: 0 ns
Wall time: 0 ns


In [40]:
dir(Chroma)

['_Chroma__query_collection',
 '_LANGCHAIN_DEFAULT_COLLECTION_NAME',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_asimilarity_search_with_relevance_scores',
 '_cosine_relevance_score_fn',
 '_euclidean_relevance_score_fn',
 '_get_retriever_tags',
 '_max_inner_product_relevance_score_fn',
 '_select_relevance_score_fn',
 '_similarity_search_with_relevance_scores',
 'aadd_documents',
 'aadd_texts',
 'add_documents',
 'add_images',
 'add_texts',
 'adelete',
 'afrom_documents',
 'afrom_texts',
 'amax_marginal_relevance_search',
 'amax_marginal_relevance_search_by_vector'

In [36]:
query = 'how can I use rag?'

In [37]:
%%time
res = db.similarity_search(query)
print(res[0].page_content)

How does Retrieval-Augmented Generation work?
CPU times: total: 0 ns
Wall time: 2.41 s


In [48]:
type(db)

langchain_community.vectorstores.chroma.Chroma

In [45]:
%%time
db2 = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

CPU times: total: 0 ns
Wall time: 3.7 ms


In [47]:
%%time
query = "Uncle Chub?"
print(db2.as_retriever().invoke(query)[0].page_content)

What is the difference between Retrieval-Augmented Generation and semantic search?
CPU times: total: 0 ns
Wall time: 2.36 s


In [24]:
res[0].dict()

{'page_content': 'How does Retrieval-Augmented Generation work?',
 'metadata': {'description': 'What is Retrieval-Augmented Generation how and why businesses use Retrieval-Augmented Generation, and how to use Retrieval-Augmented Generation with AWS.',
  'language': 'en-US',
  'source': 'https://aws.amazon.com/what-is/retrieval-augmented-generation/',
  'title': 'What is RAG? - Retrieval-Augmented Generation Explained - AWS'},
 'type': 'Document'}

In [22]:
db.as_retriever().invoke(query)

[Document(page_content='How does Retrieval-Augmented Generation work?', metadata={'description': 'What is Retrieval-Augmented Generation how and why businesses use Retrieval-Augmented Generation, and how to use Retrieval-Augmented Generation with AWS.', 'language': 'en-US', 'source': 'https://aws.amazon.com/what-is/retrieval-augmented-generation/', 'title': 'What is RAG? - Retrieval-Augmented Generation Explained - AWS'}),
 Document(page_content='What is the difference between Retrieval-Augmented Generation and semantic search?', metadata={'description': 'What is Retrieval-Augmented Generation how and why businesses use Retrieval-Augmented Generation, and how to use Retrieval-Augmented Generation with AWS.', 'language': 'en-US', 'source': 'https://aws.amazon.com/what-is/retrieval-augmented-generation/', 'title': 'What is RAG? - Retrieval-Augmented Generation Explained - AWS'}),
 Document(page_content='You can think of the Large Language Model as an over-enthusiastic new employee who re

In [11]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [20]:
query = 'what is rag?'
res = vectorstore.similarity_search(query)
print(res[0].page_content)

What is the difference between Retrieval-Augmented Generation and semantic search?


In [14]:
question = 'what is rag?'
# vectorstore.as_retriever().invoke(question)

In [15]:
retriever = load_and_retrieve_docs(url)

In [17]:
question = 'How can I use rag'
retrieved_docs = retriever.invoke(question)
formatted_context = format_docs(retrieved_docs)
formatted_context

'You can think of the Large Language Model as an over-enthusiastic new employee who refuses to stay informed with current events but will always answer every question with absolute confidence. Unfortunately, such an attitude can negatively impact user trust and is not something you want your chatbots to emulate!\nRAG is one approach to solving some of these challenges. It redirects the LLM to retrieve relevant information from authoritative, pre-determined knowledge sources. Organizations have greater control over the generated text output, and users gain insights into how the LLM generates the response.\n\n\n\n\n\n\n\nWhat are the benefits of Retrieval-Augmented Generation?\n\nYou can think of the Large Language Model as an over-enthusiastic new employee who refuses to stay informed with current events but will always answer every question with absolute confidence. Unfortunately, such an attitude can negatively impact user trust and is not something you want your chatbots to emulate!\

In [26]:
retrieved_docs[0]

Document(page_content='You can think of the Large Language Model as an over-enthusiastic new employee who refuses to stay informed with current events but will always answer every question with absolute confidence. Unfortunately, such an attitude can negatively impact user trust and is not something you want your chatbots to emulate!\nRAG is one approach to solving some of these challenges. It redirects the LLM to retrieve relevant information from authoritative, pre-determined knowledge sources. Organizations have greater control over the generated text output, and users gain insights into how the LLM generates the response.\n\n\n\n\n\n\n\nWhat are the benefits of Retrieval-Augmented Generation?', metadata={'description': 'What is Retrieval-Augmented Generation how and why businesses use Retrieval-Augmented Generation, and how to use Retrieval-Augmented Generation with AWS.', 'language': 'en-US', 'source': 'https://aws.amazon.com/what-is/retrieval-augmented-generation/', 'title': 'Wha

In [20]:
chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name="my_collection")

In [21]:
collection.add(
    documents=["Here is a book", "A book is here", "Here is a fucking book", "Here is a book!!"],
    metadatas=[
        {"source": "polite book", "page":1},
        {"source": "sophisicated book", "page":7},
        {"source": "bad mouth", "page":9},
        {"source": "cult movit", "script": 10},
    ],
    ids=["id1", "id2", "id3", "id4"]
)

C:\Users\doublebank\.cache\chroma\onnx_models\all-MiniLM-L6-v2\onnx.tar.gz: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 79.3M/79.3M [00:10<00:00, 8.23MiB/s]


In [22]:
query_texts = ["Where is my fucking book?"]

results = collection.query(
    query_texts=query_texts,
    n_results=2,
    include=[
        "distances",
        "metadatas",
        # "embeddings",
        "documents"
    ]
)

results

{'ids': [['id3', 'id2']],
 'distances': [[0.6216685771942139, 0.6718968749046326]],
 'metadatas': [[{'page': 9, 'source': 'bad mouth'},
   {'page': 7, 'source': 'sophisicated book'}]],
 'embeddings': None,
 'documents': [['Here is a fucking book', 'A book is here']],
 'uris': None,
 'data': None}

In [23]:
def text_with_source(results):
    idx = results['distances'][0].index(min(results['distances'][0]))
    return {
        'ids': results['ids'][0][idx],
        'distances': results['distances'][0][idx],
        'metadatas': results['metadatas'][0][idx],
        'documents': results['documents'][0][idx]
    }

In [24]:
text_with_source(results)

{'ids': 'id3',
 'distances': 0.6216685771942139,
 'metadatas': {'page': 9, 'source': 'bad mouth'},
 'documents': 'Here is a fucking book'}

# What do we need to do  
- Understand how text extraction work: url, pdf  
- Understand how langchain works  
- Understand how to store data into vector database  
- Understand how to retrieve similar vectors from vector database  
- Understand how to generate the prompt  
- Understand how to fetch the prompt  

In [47]:
url = 'https://aws.amazon.com/what-is/retrieval-augmented-generation/'
loader = WebBaseLoader(
    web_paths=(url,),
    bs_kwargs=dict()
)
docs = loader.load()

In [48]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [49]:
embeddings = OllamaEmbeddings(model="mistral")
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [51]:
dir(vectorstore)

['_Chroma__query_collection',
 '_LANGCHAIN_DEFAULT_COLLECTION_NAME',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_asimilarity_search_with_relevance_scores',
 '_client',
 '_client_settings',
 '_collection',
 '_cosine_relevance_score_fn',
 '_embedding_function',
 '_euclidean_relevance_score_fn',
 '_get_retriever_tags',
 '_max_inner_product_relevance_score_fn',
 '_persist_directory',
 '_select_relevance_score_fn',
 '_similarity_search_with_relevance_scores',
 'aadd_documents',
 'aadd_texts',
 'add_documents',
 'add_images',
 'add_texts',
 'adelete',
 'afrom_documents'

In [53]:
retriever = vectorstore.as_retriever()

In [65]:
question = "How can I use rag on AWS"

In [66]:
retrieved_docs = retriever.invoke(question)

In [67]:
len(retrieved_docs)

4

In [68]:
retrieved_docs[0].dict()

{'page_content': 'Retrieve up to 100 semantically-relevant passages of up to 200 token words each, ordered by relevance.\nUse pre-built connectors to popular data technologies like Amazon Simple Storage Service, SharePoint, Confluence, and other websites.\nSupport a wide range of document formats such as HTML, Word, PowerPoint, PDF, Excel, and text files.\nFilter responses based on those documents that the end-user permissions allow.\n\nAmazon also offers options for organizations who want to build more custom generative AI solutions. Amazon SageMaker JumpStart is a ML hub with FMs, built-in algorithms, and prebuilt ML solutions that you can deploy with just a few clicks. You can speed up RAG implementation by referring to existing SageMaker notebooks and code examples.\nGet started with Retrieval-Augmented Generation on AWS by creating a free account today\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n Next Steps on AWS',
 'metadata': {'description': 'What is Retrieval-Augmented Generation how and why

In [69]:
retrieved_docs[1].dict()

{'page_content': 'Retrieve up to 100 semantically-relevant passages of up to 200 token words each, ordered by relevance.\nUse pre-built connectors to popular data technologies like Amazon Simple Storage Service, SharePoint, Confluence, and other websites.\nSupport a wide range of document formats such as HTML, Word, PowerPoint, PDF, Excel, and text files.\nFilter responses based on those documents that the end-user permissions allow.\n\nAmazon also offers options for organizations who want to build more custom generative AI solutions. Amazon SageMaker JumpStart is a ML hub with FMs, built-in algorithms, and prebuilt ML solutions that you can deploy with just a few clicks. You can speed up RAG implementation by referring to existing SageMaker notebooks and code examples.\nGet started with Retrieval-Augmented Generation on AWS by creating a free account today\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n Next Steps on AWS',
 'metadata': {'description': 'What is Retrieval-Augmented Generation how and why

In [70]:
retrieved_docs[2].dict()

{'page_content': 'Retrieve up to 100 semantically-relevant passages of up to 200 token words each, ordered by relevance.\nUse pre-built connectors to popular data technologies like Amazon Simple Storage Service, SharePoint, Confluence, and other websites.\nSupport a wide range of document formats such as HTML, Word, PowerPoint, PDF, Excel, and text files.\nFilter responses based on those documents that the end-user permissions allow.\n\nAmazon also offers options for organizations who want to build more custom generative AI solutions. Amazon SageMaker JumpStart is a ML hub with FMs, built-in algorithms, and prebuilt ML solutions that you can deploy with just a few clicks. You can speed up RAG implementation by referring to existing SageMaker notebooks and code examples.\nGet started with Retrieval-Augmented Generation on AWS by creating a free account today\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n Next Steps on AWS',
 'metadata': {'description': 'What is Retrieval-Augmented Generation how and why

In [71]:
retrieved_docs[3].dict()

{'page_content': 'Retrieve relevant information\nThe next step is to perform a relevancy search. The user query is converted to a vector representation and matched with the vector databases. For example, consider a smart chatbot that can answer human resource questions for an organization. If an employee searches, "How much annual leave do I have?" the system will retrieve annual leave policy documents alongside the individual employee\'s past leave record. These specific documents will be returned because they are highly-relevant to what the employee has input. The relevancy was calculated and established using mathematical vector calculations and representations.\nAugment the LLM prompt\nNext, the RAG model augments the user input (or prompts) by adding the relevant retrieved data in context. This step uses prompt engineering techniques to communicate effectively with the LLM. The augmented prompt allows the large language models to generate an accurate answer to user queries.\nUpdat

In [72]:
formatted_context = format_docs(retrieved_docs)

In [73]:
len(formatted_context)

3632

In [74]:
formatted_context

'Retrieve up to 100 semantically-relevant passages of up to 200 token words each, ordered by relevance.\nUse pre-built connectors to popular data technologies like Amazon Simple Storage Service, SharePoint, Confluence, and other websites.\nSupport a wide range of document formats such as HTML, Word, PowerPoint, PDF, Excel, and text files.\nFilter responses based on those documents that the end-user permissions allow.\n\nAmazon also offers options for organizations who want to build more custom generative AI solutions. Amazon SageMaker JumpStart is a ML hub with FMs, built-in algorithms, and prebuilt ML solutions that you can deploy with just a few clicks. You can speed up RAG implementation by referring to existing SageMaker notebooks and code examples.\nGet started with Retrieval-Augmented Generation on AWS by creating a free account today\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n Next Steps on AWS\n\nRetrieve up to 100 semantically-relevant passages of up to 200 token words each, ordered by rele

In [75]:
formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"

In [81]:
print(formatted_prompt.replace("\n\n\n\n\n", '\n'))

Question: How can I use rag on AWS

Context: Retrieve up to 100 semantically-relevant passages of up to 200 token words each, ordered by relevance.
Use pre-built connectors to popular data technologies like Amazon Simple Storage Service, SharePoint, Confluence, and other websites.
Support a wide range of document formats such as HTML, Word, PowerPoint, PDF, Excel, and text files.
Filter responses based on those documents that the end-user permissions allow.

Amazon also offers options for organizations who want to build more custom generative AI solutions. Amazon SageMaker JumpStart is a ML hub with FMs, built-in algorithms, and prebuilt ML solutions that you can deploy with just a few clicks. You can speed up RAG implementation by referring to existing SageMaker notebooks and code examples.
Get started with Retrieval-Augmented Generation on AWS by creating a free account today


 Next Steps on AWS

Retrieve up to 100 semantically-relevant passages of up to 200 token words each, ordere

In [77]:
print(formatted_prompt)

Question: How can I use rag on AWS

Context: Retrieve up to 100 semantically-relevant passages of up to 200 token words each, ordered by relevance.
Use pre-built connectors to popular data technologies like Amazon Simple Storage Service, SharePoint, Confluence, and other websites.
Support a wide range of document formats such as HTML, Word, PowerPoint, PDF, Excel, and text files.
Filter responses based on those documents that the end-user permissions allow.

Amazon also offers options for organizations who want to build more custom generative AI solutions. Amazon SageMaker JumpStart is a ML hub with FMs, built-in algorithms, and prebuilt ML solutions that you can deploy with just a few clicks. You can speed up RAG implementation by referring to existing SageMaker notebooks and code examples.
Get started with Retrieval-Augmented Generation on AWS by creating a free account today














 Next Steps on AWS

Retrieve up to 100 semantically-relevant passages of up to 200 token words 

In [None]:
retrieved_docs = retriever.invoke(question)
formatted_context = format_docs(retrieved_docs)
formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
response = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': formatted_prompt}])

In [90]:
response = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': formatted_prompt.replace("\n\n\n\n\n", '\n')}])

In [91]:
print(response['message']['content'])

 To use Retrieval-Augmented Generation (RAG) on AWS, follow these next steps:

1. **Set up your environment:** Get started by creating a free Amazon Web Services (AWS) account if you don't already have one. You can sign up at https://aws.amazon.com. After signing up, familiarize yourself with the AWS Management Console and choose the services you'll need for RAG implementation - Amazon SageMaker, Amazon Simple Storage Service (S3), and other relevant services.

2. **Store your data:** Use Amazon S3 to store your documents in a centralized location that can be accessed by RAG models. Make sure your data is indexed using Amazon SageMaker's search service, such as Amazon Elasticsearch or Amazon OpenSearch Service, for efficient retrieval.

3. **Retrieve relevant information:** Perform relevancy searches based on user queries by converting the query into a vector representation and matching it with your vector databases. Use pre-built connectors to access data from popular technologies lik

In [92]:
response = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': formatted_prompt}])

In [93]:
print(response['message']['content'])

 To use RAG (Retrieval-Augmented Generation) on AWS, follow these steps:

1. Create an AWS account: If you don't have one already, sign up for a free AWS account at https://aws.amazon.com/.
2. Set up necessary services: Install and configure the Amazon SageMaker notebook instance with the required dependencies, such as RAG library and data connectors (SharePoint, Confluence, etc.). You can use Amazon SageMaker JumpStart to get started quickly.
3. Prepare your data: Store your document datasets in AWS services like Amazon Simple Storage Service (S3), SharePoint, or Confluence, which RAG can connect to directly. Ensure proper access controls are in place for the documents.
4. Perform relevancy search: Use RAG model to convert user queries into vector representations and search for relevant passages from your document datasets using pre-built connectors and filtering based on end-user permissions. You may use existing SageMaker notebooks or code examples as a starting point.
5. Augment th

In [94]:
response = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': formatted_prompt.replace("\n\n\n\n\n", '\n')}])
print(response['message']['content'])

 It seems like you are describing how to use Retrieval-Augmented Generation (RAG) on Amazon Web Services (AWS). Here's a more detailed explanation of the next steps after retrieving semantically-relevant passages using RAG on AWS:

1. Relevancy search: The user query is converted into a vector representation and matched with the vector databases. This process involves calculating the relevance between the user query and the documents in the database using mathematical vector calculations and representations. The documents that have the highest relevance scores are retrieved and considered semantically relevant to the user query.
2. Augment the LLM prompt: In this step, the RAG model augments the user input (or prompts) by adding the relevant retrieved data in context. This is done using prompt engineering techniques to effectively communicate with the Large Language Model (LLM). The augmented prompt allows the LLM to generate an accurate answer to user queries based on the relevant inf

In [95]:
response = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': formatted_prompt}])
print(response['message']['content'])

 It seems that you are describing how to use Retrieval-Augmented Generation (RAG) on Amazon Web Services (AWS), specifically for information retrieval and augmenting language model prompts with relevant data. Here's a step-by-step guide based on the provided context:

1. Set up an AWS account: If you don't have one already, sign up for a free AWS account to get started. You can create an account at https://aws.amazon.com/.

2. Use RAG connectors: RAG supports pre-built connectors to popular data technologies such as Amazon Simple Storage Service (S3), SharePoint, Confluence, and other websites. Configure these connectors to access your data sources.

3. Upload documents: Upload the required document formats (HTML, Word, PowerPoint, PDF, Excel, and text files) into AWS S3 or other supported storage services.

4. Retrieve semantically-relevant passages: Use RAG to search and retrieve up to 100 semantically-relevant passages of up to 200 token words each, ordered by relevance. Filter resp

In [19]:
chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name="my_collection")