### System libraries & setup

In [1]:
import os
import chromadb

In [2]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from dotenv import load_dotenv

In [3]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

### Load document

In [4]:
loader = TextLoader("documents/alice_in_wonderland.txt")
documents = loader.load()

### Chunk process and Database loading

In [5]:
text_splitter = SemanticChunker(
                                embeddings=embeddings,
                                breakpoint_threshold_type="percentile",
                                breakpoint_threshold_amount=95,
)

In [6]:
docs = text_splitter.split_documents(documents)

In [7]:
len(docs)

49

In [8]:
persistent_client = chromadb.PersistentClient(path="./semantic_db")
collection = persistent_client.get_or_create_collection("docs")

In [9]:
db_chroma = Chroma(
    client=persistent_client,
    collection_name="docs",
    embedding_function=embeddings,
)

In [10]:
db_chroma.add_documents(documents=docs)

['07ca1092-f292-401d-9d8b-fabec69e5f72',
 'cac79c73-f045-4fb4-96eb-e2147cab277e',
 '4af590b4-24df-4b4d-ad10-c476b059aca5',
 'c0d9b440-5033-4b0d-ae37-8edd053c4ef1',
 '4a19bfd2-d87b-4502-904a-5a91ac240459',
 '57bd8253-8bbc-4cd8-9cf8-68be830fd1a1',
 '6e0d5aa1-4eb6-4d8b-9966-7d3c2b259009',
 'd89ceb03-b170-42ae-91f1-960313639dd3',
 '65ce85ba-fc82-41f0-95a6-23bbc3b54ffb',
 'b810208a-f379-41a8-ae6b-59b455868f5c',
 '4c7f1d8f-6dfe-48d7-9686-b9afeda00a31',
 '2ebc0873-f981-4274-bceb-7817191acaa2',
 '334e1672-a88e-49a6-9579-89dba422e89a',
 '891a5456-301f-4ddf-85d4-49d6dbd9c353',
 'df3c1313-3ab8-475c-bd76-58f5a5abfb72',
 '0ddea1fa-1350-44c8-9033-cead87f9b750',
 '43093203-55b5-4b06-a5d1-28ac2ad83ee1',
 'a4b3695b-aabe-489d-b067-64ab2ce5855a',
 '741e0e7e-805f-48e6-a9b0-1bbfcc471bd9',
 '12fef396-5dc3-4096-a620-88517a1f1642',
 'bece7716-83a1-4479-9bfe-74aab7f74fb7',
 '6ed89b4c-d146-47e2-ba33-eec4e32ef0bd',
 'd25249c6-dfa3-4937-9fe1-49c76aa514da',
 'cb33c22c-d5c6-4a3e-b1fb-ca33580fa131',
 '82d28157-7da6-