In [23]:
"""
- https://python.langchain.com/v0.2/docs/tutorials/retrievers/
- https://www.youtube.com/watch?v=21i_BK8fEKA
"""
import os
import dotenv
dotenv.load_dotenv('envls')

#print(os.getenv('OPEN_API_KEY'))

True

In [4]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

#print(documents)
documents

[Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
 Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
 Document(page_content='Goldfish are popular pets for beginners, requiring relatively simple care.', metadata={'source': 'fish-pets-doc'}),
 Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'}),
 Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'})]

In [3]:
import sqlite3
sqlite3.sqlite_version

'3.46.0'

In [5]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

vector_store = Chroma.from_documents(documents, embedding=OpenAIEmbeddings())

In [6]:
vector_store.similarity_search_with_score("Cats")

[(Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
  0.28796547651290894),
 (Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
  0.3845662474632263),
 (Document(page_content='Goldfish are popular pets for beginners, requiring relatively simple care.', metadata={'source': 'fish-pets-doc'}),
  0.42294859886169434),
 (Document(page_content='Parrots are intelligent birds capable of mimicking human speech.', metadata={'source': 'bird-pets-doc'}),
  0.4255674481391907)]

In [7]:
vector_store.similarity_search_with_score("Cats are independent pets that often enjoy their own space.")

[(Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
  2.600444986455841e-06),
 (Document(page_content='Rabbits are social animals that need plenty of space to hop around.', metadata={'source': 'mammal-pets-doc'}),
  0.3240063488483429),
 (Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'}),
  0.38071876764297485),
 (Document(page_content='Goldfish are popular pets for beginners, requiring relatively simple care.', metadata={'source': 'fish-pets-doc'}),
  0.39473962783813477)]

In [8]:
query_vector = OpenAIEmbeddings().embed_query("what about cat?")
vector_store.similarity_search_by_vector(query_vector, k=2)

[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
 Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'})]

In [9]:
ret = vector_store.as_retriever(search_kwargs={"k": 2})
ret.invoke("what about cats?")


[Document(page_content='Cats are independent pets that often enjoy their own space.', metadata={'source': 'mammal-pets-doc'}),
 Document(page_content='Dogs are great companions, known for their loyalty and friendliness.', metadata={'source': 'mammal-pets-doc'})]

In [15]:
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

prompt = ChatPromptTemplate.from_messages([
    ("user", """
     Answer this question using the provided context only.
     
     {question}
     
     Context:
     {context}
     """
     )
])

model = ChatOpenAI(model="gpt-4o")
chain = {"context": ret, "question": RunnablePassthrough()} | prompt | model | StrOutputParser()

chain.invoke("What about cats?")

'Cats are independent pets that often enjoy their own space.'