In [1]:
import os
import langchain
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import TextLoader
from langchain_core.documents import Document
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
langchain.__version__


'0.3.27'

In [2]:
with open("data/secrets/key.secret") as f:
    os.environ["OPENAI_API_KEY"] = f.read()
langchain.verbose = False

In [3]:
loader = TextLoader("data/books/cyril.txt")
docs = loader.load()
docs

[Document(metadata={'source': 'data/books/cyril.txt'}, page_content="Cyril Vincent est nÃ© le 15/11/1972\nIl est le papa d'Elisa et de Matis\n")]

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)
all_splits

[Document(metadata={'source': 'data/books/cyril.txt'}, page_content="Cyril Vincent est nÃ© le 15/11/1972\nIl est le papa d'Elisa et de Matis")]

In [5]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = InMemoryVectorStore(embeddings)
db = vector_store.add_documents(documents=all_splits)
db

['fb6869e9-3752-4877-8e96-6428a9ee7049']

In [6]:
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
qa_chain = RetrievalQA.from_chain_type(llm=ChatOpenAI(), retriever=retriever)
qa_chain

RetrievalQA(verbose=False, combine_documents_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="Use the following pieces of context to answer the user's question.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]), llm=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000257B2FCACF0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000257B2FCBB60>, root_client=<openai.OpenAI object at 

In [7]:
query = "Quand est né Cyril et qui sont ses enfants ?"
response = qa_chain.invoke(query)
response

{'query': 'Quand est né Cyril et qui sont ses enfants ?',
 'result': "Cyril Vincent est né le 15/11/1972 et ses enfants s'appellent Elisa et Matis."}