In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter1.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",
    retriever=vectorstore.as_retriever(),
)
chain.run("Describe Harry's house.")

libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


"Harry's house is number four, Privet Drive, where Mr. and Mrs. Dursley live with their son Dudley. The house is described as perfectly normal, and the Dursleys are proud of their normality. The house is in a suburban neighborhood, and Mr. Dursley works at a firm called Grunnings. Mrs. Dursley is thin and blonde, and Mr. Dursley is a big, beefy man with a large mustache. The Dursleys have a secret that they fear someone will discover."