Install packages

In [None]:
pip install -q chromadb
pip install -q langchain
pip install -q langchain-community
pip install -q langchain-chroma
pip install -q langchain-openai

Import packages

In [1]:
import os
import sys
import json

from langchain.schema.runnable import RunnablePassthrough
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnableParallel
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import Chroma

os.environ["OPENAI_API_KEY"] = ""

Initiate model

In [2]:
model = ChatOpenAI(model="gpt-4o-mini")

Let's make up some example documents to test the model on

In [None]:
docs = [
    Document(page_content="John has black hair. He owns 3 cats, and likes to play pickleball."), 
    Document(page_content="Jane has brown hair. She has no pets, and loves to travel and parasail."), 
    Document(page_content="Sam is bald. He has one big dog and one small dog, and enjoys writing poetry."),
    Document(page_content="Kate has short blonde hair. She keeps a saltwater fishtank, and her hobby is knitting."),
    Document(page_content="Jim has brown hair. He owns a dog and skateboards everywhere he goes."),
    Document(page_content="Beth has long black hair. She owns a cat, and in her spare time she builds computers.")
]

We need to embed the documents into numeric vectors using another LLM. This will enable us to search based on vector similarity.

In [None]:
vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

Now we can write a prompt to contextualize the documents, and leave room for the question we want to answer

In [None]:
template = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context about my friends to answer the question. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
Question: {question} 

Context: 
{context} 

Answer: """

prompt = PromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

Time to ask some questions about these documents!

In [None]:
response = rag_chain.invoke("How many pets does Sam have?")