# RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval

In [None]:
# NOTE: An OpenAI API key must be set here for application initialization, even if not in use.
# If you're not utilizing OpenAI models, assign a placeholder string (e.g., "not_used").
import os
os.environ["OPENAI_API_KEY"] = "your-openai-key"

In [None]:
# Cinderella story defined in sample.txt
with open('demo/sample.txt', 'r') as file:
    text = file.read()

print(text[:100])

1) **Building**: RAPTOR recursively embeds, clusters, and summarizes chunks of text to construct a tree with varying levels of summarization from the bottom up. You can create a tree from the text in 'sample.txt' using `RA.add_documents(text)`.

2) **Querying**: At inference time, the RAPTOR model retrieves information from this tree, integrating data across lengthy documents at different abstraction levels. You can perform queries on the tree with `RA.answer_question`.

### Building the tree

In [47]:
from raptor import RetrievalAugmentation 

In [None]:
RA = RetrievalAugmentation()

# construct the tree
RA.add_documents(text)

### Querying from the tree

```python
question = # any question
RA.answer_question(question)
```

In [None]:
question = "How did Cinderella reach her happy ending ?"

answer = RA.answer_question(question=question)

print("Answer: ", answer)

In [32]:
# Save the tree by calling RA.save("path/to/save")
SAVE_PATH = "demo/cinderella"
RA.save(SAVE_PATH)

ValueError: There is no tree to save.

In [None]:
# load back the tree by passing it into RetrievalAugmentation

RA = RetrievalAugmentation(tree=SAVE_PATH)

answer = RA.answer_question(question=question)
print("Answer: ", answer)

## Using other Open Source Models for Summarization/QA/Embeddings

If you want to use other models such as Llama or Mistral, you can very easily define your own models and use them with RAPTOR. 

In [56]:
import torch
from raptor import BaseSummarizationModel, BaseQAModel, BaseEmbeddingModel, RetrievalAugmentationConfig
from transformers import AutoTokenizer, pipeline

In [57]:
# if you want to use the Gemma, you will need to authenticate with HuggingFace, Skip this step, if you have the model already downloaded
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [59]:
from transformers import AutoTokenizer, pipeline
import torch

# You can define your own Summarization model by extending the base Summarization Class. 
class GEMMASummarizationModel(BaseSummarizationModel):
    def __init__(self, model_name="google/gemma-2b-it"):
        # Initialize the tokenizer and the pipeline for the GEMMA model
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.summarization_pipeline = pipeline(
            "text-generation",
            model=model_name,
            model_kwargs={"torch_dtype": torch.bfloat16},
            device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),  # Use "cpu" if CUDA is not available
        )

    def summarize(self, context, max_tokens=150):
        # Format the prompt for summarization
        messages=[
            {"role": "user", "content": f"Write a summary of the following, including as many key details as possible: {context}:"}
        ]
        
        prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
        # Generate the summary using the pipeline
        outputs = self.summarization_pipeline(
            prompt,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            top_p=0.95
        )
        
        # Extracting and returning the generated summary
        summary = outputs[0]["generated_text"].strip()
        return summary


In [58]:
class GEMMAQAModel(BaseQAModel):
    def __init__(self, model_name= "google/gemma-2b-it"):
        # Initialize the tokenizer and the pipeline for the model
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.qa_pipeline = pipeline(
            "text-generation",
            model=model_name,
            model_kwargs={"torch_dtype": torch.bfloat16},
            device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
        )

    def answer_question(self, context, question):
        # Apply the chat template for the context and question
        messages=[
              {"role": "user", "content": f"Given Context: {context} Give the best full answer amongst the option to question {question}"}
        ]
        prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
        # Generate the answer using the pipeline
        outputs = self.qa_pipeline(
            prompt,
            max_new_tokens=256,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            top_p=0.95
        )
        
        # Extracting and returning the generated answer
        answer = outputs[0]["generated_text"][len(prompt):]
        return answer

In [59]:
from sentence_transformers import SentenceTransformer
class SBertEmbeddingModel(BaseEmbeddingModel):
    def __init__(self, model_name="sentence-transformers/multi-qa-mpnet-base-cos-v1"):
        self.model = SentenceTransformer(model_name)

    def create_embedding(self, text):
        return self.model.encode(text)


In [60]:
RAC = RetrievalAugmentationConfig(summarization_model=GEMMASummarizationModel(), qa_model=GEMMAQAModel(), embedding_model=SBertEmbeddingModel())



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [33]:
import requests
import json
from typing import List, Dict, Any
from raptor import BaseSummarizationModel, BaseQAModel, BaseEmbeddingModel, RetrievalAugmentationConfig


class OllamaBaseModel:
    def __init__(self, model_name: str, ollama_url: str = "http://localhost:11434/api"):
        """
        :param model_name: Name of the local Ollama model.
        :param ollama_url: URL to Ollama's local API.
        """
        self.model_name = model_name
        self.ollama_url = ollama_url

    def _post_request(self, endpoint: str, payload: Dict[str, Any]) -> Dict[str, Any]:
        """Sends a POST request to the Ollama API and returns the response."""
        url = f"{self.ollama_url}/{endpoint}"
        try:
            response = requests.post(
                url, headers={"Content-Type": "application/json"}, data=json.dumps(payload)
            )
            response.raise_for_status()
            
            return response
        except requests.exceptions.RequestException as e:
            raise Exception(f"Request to {url} failed: {str(e)}")


class GEMMASummarizationModel(BaseSummarizationModel, OllamaBaseModel):
    def summarize(self, context: str, max_tokens: int = 150) -> str:
        """Summarize the given context using the local Ollama model."""
        payload = {
            "model": self.model_name,
            "prompt": f"Write a detailed summary of the following text: {context}",
            "max_tokens": max_tokens,
        }
        result = self._post_request("generate", payload)
        # Parse the raw responses into a list of JSON objects
        responses = [json.loads(line.strip()) for line in result.text.strip().split("\n")]

        # Combine the `response` values
        combined_response = "".join([resp["response"] for resp in responses])

        return combined_response


class GEMMAQAModel(BaseQAModel, OllamaBaseModel):
    def answer_question(self, context: str, question: str) -> str:
        """Answer the question based on the given context using the local Ollama model."""
        payload = {
            "model": self.model_name,
            "prompt": f"Given the context: {context}, answer the question: {question}",
            "max_tokens": 256,
        }
        result = self._post_request("generate", payload)
        # Parse the raw responses into a list of JSON objects
        responses = [json.loads(line.strip()) for line in result.text.strip().split("\n")]

        # Combine the `response` values
        combined_response = "".join([resp["response"] for resp in responses])

        return combined_response


class SBertEmbeddingModel(BaseEmbeddingModel, OllamaBaseModel):
    def create_embedding(self, text: str) -> List[float]:
        """Create an embedding for the given text using the local Ollama model."""
        payload = {
            "model": self.model_name,
            "prompt": f"Create an embedding for the text: {text}",
        }
        result = self._post_request("embed", payload)
        try:
            # Parse the raw responses into a list of JSON objects
            responses = [json.loads(line.strip()) for line in result.text.strip().split("\n")]
            
            # Extract embeddings from all responses and concatenate them
            embeddings = []
            for resp in responses:
                if "embedding" in resp:
                    embeddings.extend(resp["embedding"])
            
            return embeddings  # Return the complete embedding array
        except json.JSONDecodeError as e:
            raise Exception(f"Failed to parse embedding response: {str(e)}")
        except KeyError as e:
            raise Exception(f"Missing key in embedding response: {str(e)}")

class RetrievalAugmentationConfig:
    def __init__(
        self, 
        summarization_model: BaseSummarizationModel, 
        qa_model: BaseQAModel, 
        embedding_model: BaseEmbeddingModel
    ):
        """Configuration for retrieval-augmented tasks using Ollama models."""
        self.summarization_model = summarization_model
        self.qa_model = qa_model
        self.embedding_model = embedding_model
        
# Initialize the RetrievalAugmentationConfig with Ollama models
RAC = RetrievalAugmentationConfig(
    summarization_model=GEMMASummarizationModel(model_name="llama3.2"),
    qa_model=GEMMAQAModel(model_name="llama3.2"),
    embedding_model=SBertEmbeddingModel(model_name="llama3.2"),
)

# Example usage
if __name__ == "__main__":
    text = "Artificial Intelligence is transforming the world by automating tasks and providing intelligent insights..."
    question = "What are the key benefits of AI?"

    # Summarization
    try:
        summary = RAC.summarization_model.summarize(text)
        print("Summary:", summary)
    except Exception as e:
        print("Summarization Error:", e)

    # QA
    try:
        answer = RAC.qa_model.answer_question(text, question)
        print("Answer:", answer)
    except Exception as e:
        print("QA Error:", e)

    # Embedding
    try:
        embedding = RAC.embedding_model.create_embedding(text)
        print("Embedding:", embedding)
    except Exception as e:
        print("Embedding Error:", e)


Summary: Unfortunately, you didn't provide any additional text for me to summarize. However, I can still provide a general summary based on the provided phrase:

The phrase "Artificial Intelligence is transforming the world by automating tasks and providing intelligent insights" suggests that Artificial Intelligence (AI) is revolutionizing various aspects of our lives by taking over mundane and repetitive tasks, thereby freeing up human resources for more strategic and creative pursuits.

Here's a detailed summary:

* **Automation of Tasks**: AI-powered systems are capable of performing routine and repetitive tasks with precision and speed, allowing humans to focus on higher-value activities. This includes tasks such as data entry, customer service, bookkeeping, and other administrative duties.
* **Intelligent Insights**: AI algorithms can analyze vast amounts of data, identify patterns, and provide meaningful insights that were previously inaccessible to humans. This enables organizat

In [60]:
from raptor import RetrievalAugmentation, RetrievalAugmentationConfig

RAC = RetrievalAugmentationConfig(
    summarization_model=GEMMASummarizationModel(model_name="llama3.2"),
    qa_model=GEMMAQAModel(model_name="llama3.2"),
    embedding_model=SBertEmbeddingModel()
    # embedding_model=SBertEmbeddingModel(model_name="llama3.2"),
)
RA = RetrievalAugmentation(config=RAC)

2024-12-16 23:16:06,087 - Load pretrained SentenceTransformer: sentence-transformers/multi-qa-mpnet-base-cos-v1


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/436M [00:00<?, ?B/s]

model_O1.onnx:   0%|          | 0.00/436M [00:00<?, ?B/s]

model_O2.onnx:   0%|          | 0.00/436M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [47]:
pip install matplotlib tiktoken bs4 langchain_community pandas




In [48]:
import locale
def getpreferredencoding(do_setlocale = True):
  return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [49]:
import matplotlib.pyplot as plt
import tiktoken
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader

## Helper Fuction to count the number of Tokensin each text
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens
#
# LCEL docs
url = "https://football360.ir/"
loader = RecursiveUrlLoader(
    url=url, max_depth=2, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()

# # LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
# url = "https://react.dev/community"
# loader = RecursiveUrlLoader(
#     url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
# )
# docs_pydantic = loader.load()

# # LCEL w/ Self Query (outside the primary LCEL docs)
# url = "https://react.dev/learn"
# loader = RecursiveUrlLoader(
#     url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
# )
# docs_sq = loader.load()

# # Doc texts
# docs.extend([*docs_pydantic, *docs_sq])
docs_texts = [d.page_content for d in docs]

In [50]:
# Doc texts concat
d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
d_reversed = list(reversed(d_sorted))
concatenated_content = "\n\n\n --- \n\n\n".join(
    [doc.page_content for doc in d_reversed]
)
print(
    "Num tokens in all context: %s"
    % num_tokens_from_string(concatenated_content, "cl100k_base")
)

Num tokens in all context: 141589


In [52]:
RA.add_documents(concatenated_content)


2024-12-16 23:08:01,767 - Creating Leaf Nodes
2024-12-16 23:11:50,920 - Created 1232 Leaf Embeddings
2024-12-16 23:11:50,936 - Building All Nodes
2024-12-16 23:11:50,993 - Using Cluster TreeBuilder
2024-12-16 23:11:51,001 - Constructing Layer 0


ValueError: Found array with 0 feature(s) (shape=(1232, 0)) while a minimum of 1 is required.

In [45]:
with open('demo/sample.txt', 'r', encoding='utf-8') as file:
    text = file.read()

RA.add_documents(text)


2024-12-16 22:58:35,107 - Creating Leaf Nodes
2024-12-16 22:58:37,138 - Created 3 Leaf Embeddings
2024-12-16 22:58:37,139 - Building All Nodes
2024-12-16 22:58:37,140 - Using Cluster TreeBuilder
2024-12-16 22:58:37,142 - Constructing Layer 0
2024-12-16 22:58:37,143 - Stopping Layer construction: Cannot Create More Layers. Total Layers in tree: 0
2024-12-16 22:58:37,144 - Successfully initialized TreeRetriever with Config 
        TreeRetrieverConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Context Embedding Model: EMB
            Embedding Model: <__main__.SBertEmbeddingModel object at 0x000001C542C63850>
            Num Layers: None
            Start Layer: None
        


In [46]:
question = "سرمربی استفلال توی کنفرانس خبری بازی استقلال گل گهر چه گفت؟"

answer = RA.answer_question(question=question)

print("Answer: ", answer)

2024-12-16 22:59:01,802 - Using collapsed_tree
  dist = 1.0 - uv / math.sqrt(uu * vv)


Answer:  سرمربی استقلال به صورت خوش‌بین با تمدید قرارداد آبی‌ها درConversation قرار گرفت.
