In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("chapter_cheap.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

vectorstore.similarity_search("question")

[-0.03629858192333018,
 -0.007224538187570188,
 -0.03371885554109727,
 -0.02866363267807191,
 -0.026865641732513695,
 0.03460482274185763,
 -0.012318847263635718,
 -0.007752209747023993,
 0.0019380524367559983,
 -0.002701873068082294,
 0.02478101390138119,
 -0.002477124199887517,
 -0.005732726535614382,
 -0.002905449946508664,
 0.006677323288765644,
 -0.003032482117949758,
 0.03384914384922044,
 -0.0015032120884641703,
 0.021093827586875228,
 -0.008996472123429598,
 -0.021719216308744023,
 0.01038405247696104,
 0.006244111590891486,
 0.00708122021044435,
 -0.012312332661965037,
 0.0008998100308185962,
 0.005876044512740219,
 -0.009888952994538026,
 -0.0030731974470689016,
 -0.02457255037320985,
 0.01074234811826759,
 -0.013810659381252829,
 -0.02442923286174532,
 -0.014110324538845866,
 0.0024347802203507035,
 -0.018878911447619554,
 0.0005618723451099323,
 -0.011270018746398786,
 0.018110203351641003,
 -0.009967126351940971,
 0.013028923944578141,
 -0.011328649230112302,
 -0.009133275

In [4]:
vectorstore.similarity_search("question")

[Document(page_content='It was a bright cold day in April, and the clocks were striking thirteen. Winston Smith, his chin nuzzled into his breast in an effort to escape the vile wind, slipped quickly through the glass doors of Victory Mansions, though not quickly enough to prevent a swirl of gritty dust from entering along with him.\nThe hallway smelt of boiled cabbage and old rag mats. At one end of it a coloured poster, too large for indoor display, had been tacked to the wall. It depicted simply an enormous face, more than a metre wide: the face of a man of about forty-five, with a heavy black moustache and ruggedly handsome features. Winston made for the stairs. It was no use trying the lift. Even at the best of times it was seldom working, and at present the electric current was cut off during daylight hours. It was part of the economy drive in preparation for Hate Week. The flat was seven flights up, and Winston, who was thirty-nine and had a varicose ulcer above his right ankle,