In [None]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="llama3.2")

In [None]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [None]:
from uuid import uuid4

from langchain_core.documents import Document
from langchain_community.document_loaders import TelegramChatLoader

loader = TelegramChatLoader("./data/tg_chat.json")

documents = loader.load()

uuids = [str(uuid4()) for _ in range(len(documents))]

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(documents)

print(f"Split into {len(all_splits)} sub-documents.")

trimmed = all_splits[:10]
print(trimmed)
print(f"Trimmed into {len(trimmed)} sub-documents.")
uuids = [str(uuid4()) for _ in range(len(trimmed))]


In [None]:
# may take a while
vector_store.add_documents(documents=trimmed, ids=uuids)

In [None]:
results = vector_store.similarity_search(
    "hungry",
    k=2,
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

In [None]:
results = vector_store.similarity_search(
    "travel",
    k=2,
    filter={"source": {"$eq": "data/tg_chat.json"}},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

In [None]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 1})
retriever.invoke("weather", filter={"source": "data/tg_chat.json"})

In [None]:
vector_store.save_local("faiss_index")

new_vector_store = FAISS.load_local(
    "faiss_index", embeddings, allow_dangerous_deserialization=True
)

docs = new_vector_store.similarity_search("capital letters")

In [None]:
len(docs)
print(docs)