# Retrieval: Vectorstore-Backed Retriever

In [1]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document

In [2]:
embedding = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
)

vectorstore = Chroma(persist_directory = "./intro-to-ds-lectures", 
                     embedding_function = embedding)




In [3]:
len(vectorstore.get()['documents'])

42

In [4]:
retriever = vectorstore.as_retriever(search_type = 'mmr', 
                                     search_kwargs = {'k': 3, 
                                                      'lambda_mult': 0.7})

In [5]:
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001F821D52420>, search_type='mmr', search_kwargs={'k': 3, 'lambda_mult': 0.7})

In [6]:
question = "What software do data scientists use?"

In [7]:
retrieved_docs = retriever.invoke(question)

Update of nonexisting embedding ID: 55409552-1943-4892-949a-3b475ff9c840
Delete of nonexisting embedding ID: 55409552-1943-4892-949a-3b475ff9c840


In [8]:
retrieved_docs

[Document(id='0de11e39-e04e-4af8-907d-0dc21c098c76', metadata={'Course Title': 'Introduction to Data and Data Science', 'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Tools You Need'}, page_content='As you can see from the infographic, R, and Python are the two most popular tools across all columns. Their biggest advantage is that they can manipulate data and are integrated within multiple data and data science software platforms. They are not just suitable for mathematical and statistical computations. In other words, R, and Python are adaptable. They can solve a wide variety of business and data-related problems from beginning to the end'),
 Document(id='10a128e1-bc79-44f8-b692-a9c27668d5a4', metadata={'Course Title': 'Introduction to Data and Data Science', 'Lecture Title': 'Programming Languages & Software Employed in Data Science - All the Tools You Need'}, page_content='Great! We hope we gave you a good idea about the level of applicability 

In [9]:
for i in retrieved_docs:
    print(f"Page Content: {i.page_content}\n----------\nLecture Title:{i.metadata['Lecture Title']}\n")

Page Content: As you can see from the infographic, R, and Python are the two most popular tools across all columns. Their biggest advantage is that they can manipulate data and are integrated within multiple data and data science software platforms. They are not just suitable for mathematical and statistical computations. In other words, R, and Python are adaptable. They can solve a wide variety of business and data-related problems from beginning to the end
----------
Lecture Title:Programming Languages & Software Employed in Data Science - All the Tools You Need

Page Content: Great! We hope we gave you a good idea about the level of applicability of the most frequently used programming and software tools in the field of data science. Thank you for watching!
----------
Lecture Title:Programming Languages & Software Employed in Data Science - All the Tools You Need

Page Content: Among the many applications we have plotted, we can say there is an increasing amount of software designed