### Install Libraries

In [14]:
#!pip install pypdf
#!pip install chromadb
#!pip install langchain-chroma
#!pip install langchain-community
#!pip install sentence-transformers

### Import Libraries

In [2]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings.sentence_transformer import (SentenceTransformerEmbeddings,)
from langchain_text_splitters import RecursiveCharacterTextSplitter

### Load the Document

In [3]:
loader = PyPDFLoader("rag_data.pdf")
documents = loader.load()

### Split into chunks

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

### Create Embeddings

In [15]:
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

### Load into Chroma

In [8]:
db = Chroma.from_documents(docs, embedding_function)

### Query the Database

In [10]:
query = "explain the background for LLMs?"
docs = db.similarity_search(query)

### Get Relevant Chunks

In [16]:
print(docs[0].page_content)

### Congratulations you completed Module 2 !!