### System libraries & setup

In [1]:
import chromadb
import os
import nest_asyncio
from llama_index.packs.raptor import RaptorPack
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import SimpleDirectoryReader
from dotenv import load_dotenv

In [2]:
nest_asyncio.apply()
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
embed_model = OpenAIEmbedding(model="text-embedding-3-small")

### Load document

In [3]:
documents = SimpleDirectoryReader(input_files=["documents/alice_in_wonderland.txt"]).load_data()

### Setup Database

In [4]:
client = chromadb.PersistentClient(path=".//raptor_db")
collection = client.get_or_create_collection("docs")
vector_store = ChromaVectorStore(chroma_collection=collection)

### RAPTOR process

In [None]:
raptor_pack = RaptorPack(
    documents,
    embed_model=embed_model,
    llm=OpenAI(model="gpt-4o", temperature=0.1),  # used for generating summaries
    vector_store=vector_store,  # used for storage
    transformations=[SemanticSplitterNodeParser(buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model)],
)