In [1]:
import os
from dotenv import load_dotenv
load_dotenv()


True

In [3]:
from langchain_groq import ChatGroq
groq_api_key = os.getenv("GROQ_API_KEY")
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

llm = ChatGroq(model="Llama3-8b-8192",groq_api_key=groq_api_key)


In [4]:
!pip install langchain_huggingface



In [16]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are animals",
        metadata = {"source" : "mamma-pets-doc"}
    ),
    Document(
        page_content="Cats are independent pets",
        metadata = {"source":"mammal-pets-doc"}
    ),
    Document(
        page_content="GoldFish are popular pets for beginners",
        metadata = {"source":"mammal-pets-doc"}
    )
]

documents

[Document(metadata={'source': 'mamma-pets-doc'}, page_content='Dogs are animals'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='GoldFish are popular pets for beginners')]

In [17]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-V2")
embeddings



HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='all-MiniLM-L6-V2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [8]:
!pip install langchain_chroma

Collecting langchain_chroma
  Using cached langchain_chroma-0.1.3-py3-none-any.whl.metadata (1.5 kB)
Collecting chromadb!=0.5.4,!=0.5.5,<0.6.0,>=0.4.0 (from langchain_chroma)
  Using cached chromadb-0.5.3-py3-none-any.whl.metadata (6.8 kB)
Collecting fastapi<1,>=0.95.2 (from langchain_chroma)
  Downloading fastapi-0.114.0-py3-none-any.whl.metadata (27 kB)
Collecting build>=1.0.3 (from chromadb!=0.5.4,!=0.5.5,<0.6.0,>=0.4.0->langchain_chroma)
  Downloading build-1.2.2-py3-none-any.whl.metadata (6.2 kB)
Collecting chroma-hnswlib==0.7.3 (from chromadb!=0.5.4,!=0.5.5,<0.6.0,>=0.4.0->langchain_chroma)
  Downloading chroma-hnswlib-0.7.3.tar.gz (31 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb!=0.5.4,!=0.5.5,<0.6.0,>=0.4.0->langchain_chroma)
  Using cached uvicorn-0.30.6-py3-none-any.whl.metadata (

In [19]:
from langchain_chroma import Chroma

vector_store = Chroma.from_documents(documents,embedding=embeddings)
vector_store

<langchain_chroma.vectorstores.Chroma at 0x74b0fcb60e00>

In [20]:
vector_store.similarity_search("cat")

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets'),
 Document(metadata={'source': 'mamma-pets-doc'}, page_content='Dogs are animals'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='GoldFish are popular pets for beginners')]

In [22]:
# async query
await vector_store.asimilarity_search("cat")

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets'),
 Document(metadata={'source': 'mamma-pets-doc'}, page_content='Dogs are animals'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='GoldFish are popular pets for beginners')]

In [23]:
vector_store.similarity_search_with_score("cat")

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


[(Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets'),
  0.8409361839294434),
 (Document(metadata={'source': 'mamma-pets-doc'}, page_content='Dogs are animals'),
  1.2248554229736328),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='GoldFish are popular pets for beginners'),
  1.6656098365783691)]

### Retrievers

Langchain vectorStore objects do not subclass Runnable, and so cannot immediately be integrated into LangChain Expression Language chains

Langchain retrievers are runnables, so they implement a standard set of methods (e.g. Synchronous and Asynchronous invoke and batch operations) and are designed to be incorporated in LCEL chains

We can create a simple version of this ourselves, without subclassing Retriever. If we choose what method we wish to use to retrieve documents, we can create a runnable easily. Below we will build one around the similarity_search method:


In [24]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vector_store.similarity_search).bind(k = 1)
retriever.batch(["cat","dog"])

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets')],
 [Document(metadata={'source': 'mamma-pets-doc'}, page_content='Dogs are animals')]]

In [26]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs = {"k" : 1}
)

retriever.batch(["cat","dog"])

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets')],
 [Document(metadata={'source': 'mamma-pets-doc'}, page_content='Dogs are animals')]]

In [30]:
### RAG (Retrieval-Augmented Generation)

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
    Answer the following question using the provided context only
    {question}

    Context:
    {context}
"""

prompt_template = ChatPromptTemplate.from_messages([("human",message)])

rag_chain = {"context":retriever,"question":RunnablePassthrough()}|prompt_template|llm

response = rag_chain.invoke("Tell me about dogs")
response.content

'According to the provided context, dogs are animals.'