In [19]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough


llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir)

vectorstore = FAISS.from_documents(
    documents=docs,
    embedding=cached_embeddings
)

retriever = vectorstore.as_retriever()
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer, just say you don't know, don't make up an answer:\n\n{context}"),
    ("human", "{question}"),
])

chain = ({"context": retriever, "question": RunnablePassthrough()} | prompt | llm )
chain.invoke("Describe the place where the storyteller is")

AIMessage(content='The storyteller is in a city near the water, observing crowds of people gathered by the ocean. The city is described as having silent sentinels of mortal men fixed in ocean reveries, with some leaning against spiles, seated on pier-heads, or looking over the bulwarks of ships. The people in the city seem drawn to the water, standing as close as possible without falling in. The city is surrounded by water, and people from all directions come together near the shore.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 101, 'prompt_tokens': 1541, 'total_tokens': 1642, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-1c0585cf-eb0e-4bff-bebb-383a564846aa-0')