In [4]:
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
from langchain_community.vectorstores.chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

llm = ChatOpenAI(temperature=0.1)
cache_dir = LocalFileStore(".cache/")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator='\n',
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader('./files/chapter1.txt')
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()
cache_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)
vector_store = Chroma.from_documents(docs, cache_embeddings)
retriever = vector_store.as_retriever()
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n{context}"),
    ("human", "{question}")
])

chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
chain.invoke("Describe Victory Mansions.")

AIMessage(content='Victory Mansions is a building with glass doors that Winston Smith enters, where the hallway smells of boiled cabbage and old rag mats. It is a run-down place with a faulty lift, forcing residents to climb seven flights of stairs. Inside the flat, there is a telescreen on the wall that cannot be completely shut off, constantly broadcasting information. The building is described as part of a dystopian society where a poster with a large face of a man, with the caption "BIG BROTHER IS WATCHING YOU," is prominently displayed.', response_metadata={'token_usage': {'completion_tokens': 110, 'prompt_tokens': 1914, 'total_tokens': 2024}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None})