# RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval

In [1]:
# Cinderella story defined in sample.txt
with open('demo/data4.txt', 'r', encoding="utf-8") as file:
    text = file.read()

print(text[:100])

میلاد فخرالدینی در گفت‌وگو با «فوتبال 360»؛ با گل‌گهر به آسیا فکر می‌کنیم
09 بهمن 1403 ساعت 08:00
6 


1) **Building**: RAPTOR recursively embeds, clusters, and summarizes chunks of text to construct a tree with varying levels of summarization from the bottom up. You can create a tree from the text in 'sample.txt' using `RA.add_documents(text)`.

2) **Querying**: At inference time, the RAPTOR model retrieves information from this tree, integrating data across lengthy documents at different abstraction levels. You can perform queries on the tree with `RA.answer_question`.

### Building the tree

In [2]:
from raptor import RetrievalAugmentation, BaseEmbeddingModel, RetrievalAugmentationConfig

2025-02-01 19:24:29,148 - Loading faiss with AVX2 support.
2025-02-01 19:24:29,603 - Successfully loaded faiss with AVX2 support.


In [3]:
from sentence_transformers import SentenceTransformer
class SBertEmbeddingModel(BaseEmbeddingModel):
    def __init__(self, model_name="sentence-transformers/multi-qa-mpnet-base-cos-v1"):
        self.model = SentenceTransformer(model_name)

    def create_embedding(self, text):
        return self.model.encode(text)


In [4]:
RAC = RetrievalAugmentationConfig(embedding_model=SBertEmbeddingModel())
RA = RetrievalAugmentation(config=RAC)

# construct the tree
RA.add_documents(text)

2025-02-01 19:24:47,785 - Use pytorch device_name: cpu
2025-02-01 19:24:47,788 - Load pretrained SentenceTransformer: sentence-transformers/multi-qa-mpnet-base-cos-v1


2025-02-01 19:24:54,306 - Successfully initialized TreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Summarization Length: 100
            Summarization Model: <raptor.SummarizationModels.GPT3TurboSummarizationModel object at 0x000001E7FB405780>
            Embedding Models: {'EMB': <__main__.SBertEmbeddingModel object at 0x000001E7FB405990>}
            Cluster Embedding Model: EMB
        
        Reduction Dimension: 10
        Clustering Algorithm: RAPTOR_Clustering
        Clustering Parameters: {}
        
2025-02-01 19:24:54,308 - Successfully initialized ClusterTreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mo

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:26:11,334 - Created 102 Leaf Embeddings
2025-02-01 19:26:11,337 - Building All Nodes
2025-02-01 19:26:11,350 - Using Cluster TreeBuilder
2025-02-01 19:26:11,351 - Constructing Layer 0
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

2025-02-01 19:27:19,220 - Summarization Length: 100
2025-02-01 19:27:23,138 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:23,156 - Node Texts Length: 540, Summarized Text Length: 219
2025-02-01 19:27:23,158 - Summarized Text: سعید دقیقی، هم‌تیمی و هم‌بازی قدیمی، پس از صحبت‌ها، متوجه شد که به کمک نیاز دارند و برای یاری به تیم ش

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:25,107 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:25,112 - Node Texts Length: 407, Summarized Text Length: 246
2025-02-01 19:27:25,114 - Summarized Text: در این گفت‌وگو، به چند موضوع حاشیه‌ای اشاره شده که به نظر نمی‌رسد مشکلی خاص در بازی بوجود آورده باشند. شخص مورد نظر در پاسخ به این سؤال که چرا پیشنهاد گل‌گهر را قبول کرده، اظهار می‌کند که به دنبال پیشرفت است و سعی می‌کند انتخاب‌هایی داشته باشد که برای آینده‌اش مؤثر باشند. ایشان همچنین بیان می‌کند که در تلاش است تا تفکرات سرمربی تیم را به بهترین


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:27,764 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:27,771 - Node Texts Length: 557, Summarized Text Length: 100
2025-02-01 19:27:27,774 - Summarized Text: In a recent football match, a player who had only practiced for one day prior was surprisingly trusted by the coaching staff to play, which resulted in a victory for the team. At the beginning of the season, the player chose Shams Azar just one day before the transfer window closed, primarily because friends were on that team. The player expressed hope to achieve positive results in the next three games to improve their standings in the league table, with a particular focus on securing a spot in the Asian tournaments


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:29,716 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:29,723 - Node Texts Length: 284, Summarized Text Length: 225
2025-02-01 19:27:29,726 - Summarized Text: در این متن به تحلیل و بررسی اتفاقات بازی فوتبال پرداخته شده است. تأکید شده که در دنیای فوتبال، همواره اتفاقات غیرمنتظره‌ای پیش می‌آید و بازیکنان تحت فشار زیادی قرار دارند زیرا همه تیم‌ها به دنبال کسب نتیجه هستند. 

علاوه بر این، اشاره شده که تیم آبی‌پوشان پس از جذب بازیکنانی نظیر جوئل کوجو، محمدرضا آزادی، و صالح ح


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:32,015 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:32,024 - Node Texts Length: 1045, Summarized Text Length: 204
2025-02-01 19:27:32,026 - Summarized Text: در یکی از بازی‌های اخیر تیم فوتبال پایتخت‌نشین، محمد خدابنده‌لو به طور زودهنگام اخراج شد که منجر به این شد که تیم مجبور باشد یک نیمه را با یک بازیکن کمتر بازی کند. این اخراج اولین کارت قرمز سرخ‌پوشان در دیدارهای مهم این فصل نبود.

همچنین، آرمین سهرابیان که در ترکیب نساجی برابر آلومینیوم ا


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:34,779 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:34,789 - Node Texts Length: 1344, Summarized Text Length: 204
2025-02-01 19:27:34,791 - Summarized Text: در روزهای اخیر مذاکراتی میان مدیران باشگاه‌های استقلال و نساجی برای انتقال آرمین سهرابیان انجام شد که در نهایت منجر به این انتقال گردید. سهرابیان که در ابتدای فصل بازیکن استقلال بود، پس از یک جدایی پرماجرا به نساجی رفته بود و اکنون با قراردادی یک سال و نیمه دوباره به جمع آبی‌پوشان پایتخت بازگشته است.




Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:37,265 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:37,272 - Node Texts Length: 736, Summarized Text Length: 104
2025-02-01 19:27:37,273 - Summarized Text: Following an agreement, Saleh Hardani, who moved from Esteghlal to Sepahan at the beginning of the season, has returned to the capital's blue team. In exchange, Milad Zaki-Pour, who joined Esteghlal before the start of the 24th league, has rejoined Sepahan to continue his season under coach Patrice Carteron.

Armin Sohrabian has become Esteghlal's fourth winter signing after Joel Kojo, Mohammad Reza Azadi,


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:39,264 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:39,270 - Node Texts Length: 669, Summarized Text Length: 219
2025-02-01 19:27:39,271 - Summarized Text: وینگر فرانسوی-کنگویی که اخیراً پس از گلزنی به استقلال تهران، قرارداد خود را به مدت دو فصل دیگر تمدید کرد، به یک مهره درآمدزا برای استقلال خوزستان تبدیل شده است. گفته می‌شود که مدیران باشگاه خوزستانی در تلاش هستند تا با دریافت 500 هزار دلار، رضایت‌نامه این بازیکن به نام بیفوما را صادر کنند.

این مهاجم 31 ساله پس از یک نیم‌


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:41,768 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:41,774 - Node Texts Length: 441, Summarized Text Length: 235
2025-02-01 19:27:41,775 - Summarized Text: در چند سال گذشته، باشگاه و تیم مورد نظر نشان داده‌اند که یک باشگاه بزرگ هستند و امکانات لازم برای ارتقای جایگاه خود را دارند. در این میان، فردی از کادرفنی گل‌گهر به خاطر اعتماد به او و قرار دادن نامش در لیست، ابراز تشکر کرده و امیدوار است که با عملکرد خود بتواند مثمر ثمر باشد.

در خبری از باشگاه استقلال، اطلاع داده شد که مهرداد محمدی به دلیل مص


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:44,106 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:44,112 - Node Texts Length: 470, Summarized Text Length: 213
2025-02-01 19:27:44,116 - Summarized Text: در نیمه دوم بازی اخیر، تیم می‌توانست سه گل بزند و برنده شود، اما متأسفانه بازی مساوی شد و به رغم این قضایا، شکر خدا بازی را نباختیم. در بازی مقابل ملوان، اتفاقات عجیبی رخ داد و ملوان با 9 نفر به پایان بازی رسید. درباره این مسابقه نکات جالبی وجود دارد.

من تصمیم خود را گرفته‌ام که جوان‌ترها اکنون باید به تیم کمک


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:46,467 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:46,473 - Node Texts Length: 349, Summarized Text Length: 230
2025-02-01 19:27:46,475 - Summarized Text: در حال حاضر در لیگ، برخی تیم‌ها برای قهرمانی و برخی دیگر برای کسب سهمیه می‌جنگند، که این وضعیت باعث دشواری در کسب امتیاز برای همه تیم‌ها شده است. گیورگی گولسیانی، بازیکن تیم سرخ‌پوشان، تا زمان اخراج عملکرد خوبی داشت، اما پس از آن عملکرد تیم دچار افت شد. 

مدیران باشگاه استقلال در روز گذشته به فعالیت‌های نقل‌وانت


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:48,911 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:48,919 - Node Texts Length: 366, Summarized Text Length: 234
2025-02-01 19:27:48,922 - Summarized Text: متن شامل تصمیم یک فرد برای جدایی از تیم خود است، زمانی که احساس کرده نمی‌تواند به آن کمک کند. او این تصمیم را گرفته و اقداماتی را در این زمینه انجام داده است. این فرد به تیم بسیار محبت‌آمیز اشاره کرده و امیدوار است که آنها بهترین نتایج را کسب کنند.

سپس سؤالی درباره ادامه فصل و کار با آقای تارتار در سیرجان می‌شود. در ادامه، خبر از بازگشت آرمین س


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:51,606 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:51,615 - Node Texts Length: 1400, Summarized Text Length: 199
2025-02-01 19:27:51,618 - Summarized Text: میلاد فخرالدینی، بازیکن باسابقه فوتبال ایران، در گفت‌وگویی با «فوتبال 360» از وضعیت خود در تیم گل‌گهر سیرجان صحبت کرد. این تیم در بازار نقل‌وانتقالات زمستانی عملکرد خوبی داشت و فخرالدینی را از شمس‌آذر قزوین به خدمت گرفت تا به کادر مدافع خود به سرمربی مهدی تارتار، تقویت بیشتری ببخ


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:53,947 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:53,954 - Node Texts Length: 98, Summarized Text Length: 213
2025-02-01 19:27:53,958 - Summarized Text: سروش رفیعی با رونمایی رسمی از چهارمین خرید زمستانی باشگاه استقلال به تیم خود بازگشت. این انتقال شامل بازگشت آرمین سهرابیان به استقلال نیز می‌شود. این خبر به معنای آغاز فصل جدیدی برای سهرابیان در این تیم و خداحافظی از روزهای روشن قبلی‌اش است. این انتقالات نشان‌دهنده تغییرات در ترکیب استقلال و تمرکز بر تقویت تیم در نیم‌


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:57,141 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:27:57,148 - Node Texts Length: 196, Summarized Text Length: 229
2025-02-01 19:27:57,150 - Summarized Text: مهدی رحمتی اشاره کرده که بازیکن قبلاً تصمیم به جدایی گرفته بود، اما با پیشنهاد گل‌گهر استقبال کرده و انتخابش را با روی باز انجام داده است. او همچنین بیان کرده که هدف تیم این است که در ادامه فصل نتایج بهتری کسب کند. حضور سهرابیان در تیم باعث شده که جذب غلامی دیگر در دستور کار نباشد و فعالیت‌های نقل و انتقالاتی تیم آبی ادامه خواهد یافت.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:27:58,765 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-02-01 19:27:58,769 - Retrying request to /chat/completions in 23.000000 seconds
2025-02-01 19:28:27,089 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:27,093 - Node Texts Length: 243, Summarized Text Length: 241
2025-02-01 19:28:27,095 - Summarized Text: یک بازیکن در مصاحبه‌ای درباره دو بازی آغازین خود برای تیم گل‌گهر صحبت کرده و اظهار داشته است که این دو بازی به شرایط جدول تیم کمک کرده‌اند. او از توانایی تیم برای بازگشت به وضعیت بازی و کسب پیروزی در این دو مسابقه ابراز رضایت کرده و اشاره کرده که حق تیم برای بردن در این بازی‌ها محفوظ بوده است.

او همچنین به شرایط خوب خود اشاره می‌کند و بیان می‌کند که با اکثر باز


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:29,241 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:29,250 - Node Texts Length: 145, Summarized Text Length: 211
2025-02-01 19:28:29,253 - Summarized Text: در نیم‌فصل، شرایط برای من بسیار خوب بود و در این مدت پانزده بازی انجام دادم. در آخرین روز بازار زمستانه، بیفوما به عنوان یک چهره ویژه توجه‌ها را به خود جلب کرد. همچنین تیوی نیز در موقعیتی سرنوشت‌ساز قرار داشت. پس از یک نیم‌فصل خانه‌نشینی، به طور ناگهانی به یک تیم کاندیدای سقوط به لیگ 2 پیوستم


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:31,685 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:31,691 - Node Texts Length: 580, Summarized Text Length: 104
2025-02-01 19:28:31,693 - Summarized Text: In the winter transfer window, significant developments occurred for Iranian football clubs, notably for Persepolis and Esteghlal. As of February 8, 1403 (Jan 28, 2025), there were key updates:

1. **Esteghlal's Transfer Activity**: Esteghlal made headlines by officially unveiling their fourth winter signing, with Sahrabian returning to the team. Additionally, a player named BiFoMa emerged as a notable figure on the final day of the transfer


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:34,005 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:34,012 - Node Texts Length: 314, Summarized Text Length: 193
2025-02-01 19:28:34,015 - Summarized Text: مهران احمدی، بازیکن 27 ساله تیم خیبر خرم آباد، با امضای قرارداد دو ساله به تیم استقلال پیوست و او ششمین خرید زمستانی تیم استقلال به شمار می‌رود. 

در یک بازی، مدافع با تجربه پرسپولیس در دیدار با سپاهان در دقیقه 35 به دلیل بی‌احتیاطی کارت قرمز دریافت کرد و زمین را ترک کرد. این اخراج منجر به


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:36,618 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:36,624 - Node Texts Length: 680, Summarized Text Length: 237
2025-02-01 19:28:36,626 - Summarized Text: فخرالدینی، مدافع ۳۴ ساله، دارای آمادگی بالا و تجربه گسترده‌ای در تیم‌های مختلف است که به کمک گل‌گهر خواهد آمد. او درباره بازی سنگین با استقلال خوزستان گفت که این بازی در زمین آن تیم بسیار دشوار است و این تیم در خانه خود موفق به شکست بسیاری از تیم‌های بزرگ شده است.

او همچنین بیان کرد که در نیم‌فصل تصمیم به جدایی گرفته و این تصمیم را به‌خاطر آمد


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:39,000 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:39,006 - Node Texts Length: 321, Summarized Text Length: 189
2025-02-01 19:28:39,008 - Summarized Text: در دیدار رفت بین پرسپولیس و تراکتور، هافبک باتجربه پرسپولیس در دقیقه 76 به دلیل تصمیم جنجالی داور وحید کاظمی از زمین اخراج شد. این اتفاق در حالی رخ داد که تیم پرسپولیس، زیر نظر خوان‌کارلوس گاریدو، با یک گل عقب بود. با این حال، پرسپولیس با یک بازگشت فوق‌العاده موفق شد در


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:41,085 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:41,092 - Node Texts Length: 212, Summarized Text Length: 223
2025-02-01 19:28:41,094 - Summarized Text: در شروع فصل، بازی اول برای تمامی تیم‌ها معمولاً دشوار است، زیرا آن‌ها به مدت نزدیک به 20 روز از شرایط بازی دور بوده‌اند. پیروزی در این بازی‌ها بسیار مهم است و می‌تواند به تیم کمک زیادی کند.

امیرارسلان مطهری، که بعد از یک‌دهه حضور در سطح اول فوتبال ایران به شهرت رسیده بود، در سن 31 سالگی به طرز ناباورانه‌ای با یک سقوط آزاد روبرو شد


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:42,722 - Constructing Layer 1
2025-02-01 19:28:49,770 - Summarization Length: 100
2025-02-01 19:28:51,600 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:51,606 - Node Texts Length: 840, Summarized Text Length: 218
2025-02-01 19:28:51,607 - Summarized Text: یک مهاجم وینگر فرانسوی-کنگویی به نام بیفوما، پس از گلزنی به استقلال تهران، قرارداد خود را به مدت دو فصل دیگر تمدید کرده است. او به یک مهره درآمدزا برای استقلال خوزستان تبدیل شده و مدیران این باشگاه در تلاش هستند با دریافت 500 هزار دلار، رضایت‌نامه این بازیکن را صادر کنند. این مهاجم 31 ساله، برای فصل جدید در انتخاب‌های خود برتر به


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:53,613 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:53,619 - Node Texts Length: 313, Summarized Text Length: 100
2025-02-01 19:28:53,621 - Summarized Text: In a recent football match, a player who had only practiced with the team for one day prior was unexpectedly trusted by the coaching staff to play, contributing to a surprising victory for the team. At the beginning of the season, this player had chosen to join Shams Azar just one day before the transfer window closed, largely due to friendships with teammates. The player expressed a strong desire to achieve positive results in the upcoming three matches, aiming to improve their position in the league standings and secure a spot


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:55,412 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:55,421 - Node Texts Length: 1300, Summarized Text Length: 227
2025-02-01 19:28:55,423 - Summarized Text: در گزارشی از فعالیت‌های اخیر باشگاه‌های فوتبال ایران، سعید دقیقی، هم‌تیمی و هم‌بازی قدیمی، به تیم شمس‌آذر برای کمک پیوست. رقابت‌ها در این فصل دشوارتر از همیشه بوده و تیم‌ها به هم نزدیک شده‌اند، به طوری که کسب هر امتیاز از هر تیم بسیار سخت است. 

در بازار نقل و انتقالات زمستانه، روز پرماجرایی برای تیم استقلال رقم خورد که


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:57,602 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:57,609 - Node Texts Length: 865, Summarized Text Length: 220
2025-02-01 19:28:57,611 - Summarized Text: این متن به تحلیل اتفاقات اخیر در دنیای فوتبال و تأثیرات آن بر تیم‌ها و بازیکنان پرداخته است. نکته مورد تأکید این است که در فوتبال، همواره رویدادهای غیرمنتظره‌ای رخ می‌دهد و بازیکنان تحت فشار زیادی هستند زیرا همه تیم‌ها به دنبال کسب نتایج مطلوب‌اند.

به‌ویژه به عملکرد تیم آبی‌پوشان اشاره شده که پس از جذب بازیکنانی


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:28:59,707 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"
2025-02-01 19:28:59,715 - Node Texts Length: 1182, Summarized Text Length: 100
2025-02-01 19:28:59,717 - Summarized Text: In this conversation, several peripheral topics were mentioned that do not seem to have caused any specific issues in the match. The person in question, when asked why they accepted Gol Gohar's offer, stated that they are looking for improvement and are trying to make choices that will be effective for their future. They also mentioned their efforts to align with the head coach's thoughts.

In recent years, the club and the team in question have demonstrated that they are a prominent club with the necessary facilities to elevate


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:29:00,513 - Constructing Layer 2
2025-02-01 19:29:00,515 - Stopping Layer construction: Cannot Create More Layers. Total Layers in tree: 2
2025-02-01 19:29:00,518 - Successfully initialized TreeRetriever with Config 
        TreeRetrieverConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Context Embedding Model: EMB
            Embedding Model: <__main__.SBertEmbeddingModel object at 0x000001E7FB405990>
            Num Layers: None
            Start Layer: None
        


In [4]:
from raptor import OllamaQAModelV2, OllamaSummarizationModelV2
qa_llm = 'deepseek-r1:7b'
qa_model = OllamaQAModelV2(model_name= qa_llm)
summarization_llm = 'deepseek-r1:7b'
summarization_model = OllamaSummarizationModelV2(model_name= summarization_llm)

RAC = RetrievalAugmentationConfig(qa_model=qa_model, summarization_model=summarization_model, embedding_model=SBertEmbeddingModel())
RA = RetrievalAugmentation(config=RAC)

# construct the tree
RA.add_documents(text)

2025-01-31 15:29:00,253 - Use pytorch device_name: cpu
2025-01-31 15:29:00,254 - Load pretrained SentenceTransformer: sentence-transformers/multi-qa-mpnet-base-cos-v1


2025-01-31 15:29:04,840 - Successfully initialized TreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Summarization Length: 100
            Summarization Model: <raptor.SummarizationModels.OllamaSummarizationModelV2 object at 0x000001F5FFA81D20>
            Embedding Models: {'EMB': <__main__.SBertEmbeddingModel object at 0x000001F5FFA81180>}
            Cluster Embedding Model: EMB
        
        Reduction Dimension: 10
        Clustering Algorithm: RAPTOR_Clustering
        Clustering Parameters: {}
        
2025-01-31 15:29:04,842 - Successfully initialized ClusterTreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mod

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 15:29:49,811 - Created 102 Leaf Embeddings
2025-01-31 15:29:49,813 - Building All Nodes
2025-01-31 15:29:49,827 - Using Cluster TreeBuilder
2025-01-31 15:29:49,830 - Constructing Layer 0
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

2025-01-31 15:30:29,495 - Summarization Length: 100
2025-01-31 15:36:40,364 - Node Texts Length: 198, Summarized Text Length: 348
2025-01-31 15:36:41,872 - Summarized Text: <think>
Alright, I need to help the user summarize this text. Let me first read through it carefully.

The text seems to be in Persian and mentions a football match between Parastar and Golestan on November 8, 2023. Ther

Batches:   0%|          | 0/1 [00:03<?, ?it/s]

2025-01-31 15:44:22,424 - Node Texts Length: 447, Summarized Text Length: 983
2025-01-31 15:44:22,482 - Summarized Text: <think>
Alright, I need to help the user by writing a detailed summary of the given text. The original text is in Persian and seems to be about a football training session where the user was trusted enough to play a game that resulted in winning. 

First, I'll read through the text carefully to understand the key points. It mentions the user choosing only one day before the end of winter training because friends were nearby. The match happens at the beginning of the autumn-fall season, specifically on August 28th, which is the last day for purchasing winter clothing in Iran.

The police are involved again this chapter, dealing with a known team and forcing them to play against their own players who have rights over an independent event scheduled. There's also mention of specific individuals: Ali Khomeini wearing blue, Mohammad Zakeri wearing red, and Shayan Gholami w

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 15:49:18,881 - Node Texts Length: 457, Summarized Text Length: 467
2025-01-31 15:49:18,883 - Summarized Text: <think>
Alright, I need to write a detailed summary of the provided text. Let me read through it carefully.

The text is in Persian and seems to be about a football (soccer) team's situation. The names Sa'eed Hakakian, Hooman Rezaie, Hojatollah Kooei, and Aram Saberi are mentioned as players who helped the team achieve independence or gained trust. There's also mention of other players like Alireza Zolghad and Aboofazl Zali.

The text talks about last season being tough for all teams, with hard matches where each goal is valuable because every match is close. The author communicates to these key players and hopes they can lead the team well in their next three games to improve their standing, especially considering an Asian Cup semi-final.

There's a focus on Hojatollah Kooei who has the ability to play both central defense and left defense, which is good for strateg

Batches:   0%|          | 0/1 [00:01<?, ?it/s]

2025-01-31 15:56:58,986 - Node Texts Length: 375, Summarized Text Length: 821
2025-01-31 15:56:59,028 - Summarized Text: <think>
Alright, so I need to write a detailed summary of this text. Let me read through it carefully first.

The text starts with "تیمی بسیار دوستداشتنی است و انشالله بهترین نتایج را بتوانند بگیرند." which translates to "This team is very good and hopefully will do their best." So the main topic seems to be about a football (soccer) team, specifically their performance.

Next, there's a note from someone, probably a supporter or journalist, asking about the continuation of a chapter and working with Mr. Tartar in Sirjan. I'm not sure what "تغییرات فصل" means, but it might mean changes to a chapter or section. So they're asking for more information on that.

Then, there's a line about Armin Sahaiean being called back to the Ashtāb Team and becoming a petiswoy mossy son. I'm not familiar with these terms in Persian football terminology. Maybe "Ashtāb" is a team name o

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 16:04:41,247 - Node Texts Length: 400, Summarized Text Length: 931
2025-01-31 16:04:41,263 - Summarized Text: <think>
Alright, so I've been given this text in Persian and asked to write a detailed summary. Let me start by reading through it carefully to understand what's going on.

The first sentence is "من خودم اینجا از همه بیشتر میخواهم تفکرات سرمربی را پیاده کنم و من دنبال این هستم که بدانم سرمربی از من چه میخواهد." This translates to something like "I want to more deeply incorporate my thoughts into the present here, and I'm seeking to understand what the surroundings (sermrb) hope for me." So it seems the speaker is reflecting on their interactions or environment.

The next part mentions "با این حال محمد خوششانس بود که همیتیها در بهترین فرم ممکن بودند و حریف از اوج آمادگی به دور بود." This is saying "Mohammad was modest but his companions were in the best possible form, and he was no longer in the peak of his enthusiasm." So Mohammad seems to be feeling less enthusiasti

Batches:   0%|          | 0/1 [00:01<?, ?it/s]

2025-01-31 16:08:26,975 - Node Texts Length: 311, Summarized Text Length: 342
2025-01-31 16:08:27,171 - Summarized Text: <think>
Alright, I need to write a detailed summary of the user's text. Let me read through it carefully.

First, there are some Italian phrases at the beginning that don't make much sense in English context. Maybe they're just examples or typos and should be ignored for the summary.

The main part is about football matches and related news. The user provided four separate points:

1. A match involving 9 players.
2. The end of a winter market day at a blue water park.
3. A TV personality, Peyman Booy, featured on the evening news that night.
4. An incident with an acceleration tape in a football game.

I'll need to create a coherent summary that connects these points while keeping each section clear. I should also include specific details like dates and times where relevant.

Maybe start with the match, then move to Peyman Booy's appearance, followed by the market ev

Batches:   0%|          | 0/1 [00:01<?, ?it/s]

2025-01-31 16:15:27,216 - Node Texts Length: 581, Summarized Text Length: 651
2025-01-31 16:15:27,237 - Summarized Text: <think>
Okay, so I need to write a detailed summary of this text about the football team Beivomaa. Let me read through the text again and try to understand all the key points.

First, it mentions that Beivomaa started their season by scoring a goal equal to the national anthem of Iran, which was a strong opening. They also have a history of equalling goals from teams like SS Azar Bahar, MAS Rafsangran, and Ghaloom Sairsian. That's interesting because it shows they've had successful scoring moments in the past.

Next, Beivomaa played against Ardak Milan Esfahan, MAS Rafflesegan, and Ghaloom Shahghal in their matches this season. They scored goals in each of these games, which probably contributed to their team spirit or strategy.

Then there's a part about Wingering Franprix-Kongamo, the club where Beivomaa is based. After scoring the opening goal equaling the nationa

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 16:27:16,593 - Node Texts Length: 736, Summarized Text Length: 1531
2025-01-31 16:27:16,598 - Summarized Text: <think>
Alright, I'm looking at this query where the user wants a detailed summary of a text in Persian. The passage is about Fardin Aghaei's potential transfer to Golestan and touches on several other topics like his experiences with different teams, the challenges he faced during the invasion of Zahedan, his decision to leave, and some internal conflicts within his management team.

First, I need to make sure I understand each part of the text. Fardin is 34 years old, which makes him a solid candidate for an experienced defender in Golestan's youth system. His high level of commitment combined with his experience from various teams suggests he could be a good fit at Golestan.

The passage also mentions his involvement in the invasion matches against Zahedan, highlighting how his team played tough and managed to secure victories despite being a smaller unit compare

Batches:   0%|          | 0/1 [00:01<?, ?it/s]

2025-01-31 16:38:17,926 - Node Texts Length: 1533, Summarized Text Length: 742
2025-01-31 16:38:18,086 - Summarized Text: <think>
Alright, I need to summarize this text. Let me read through it carefully.

First, there are a few players mentioned: صالح حردانی, میلاد زکی-پور, آرمین سهرابیان, and then some about the transfer of players between teams. Then there's information about بیفوما transferring to تهران MLS. After that, another part talks about مطهری, his current situation in a low-tier league, and his recent performance.

I should break this down into sections: first the transfers involving حردانی and زکی-پور, then سفرابیان's transfer, followed by the situation of بیفوما to تهران MLS. Then move on to مطهری's recent games and his current status.

I need to make sure I include all key points without getting too detailed. For example, حردانی is returning to قلیبودان, and زکی-پور is back to فروگاه. سفرابیان transferred from براب尔 آλομینی Mellat to قرمشپوشان Qahrooman, but then goes bac

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 16:47:50,606 - Node Texts Length: 847, Summarized Text Length: 848
2025-01-31 16:47:50,735 - Summarized Text: <think>
Okay, I need to write a detailed summary of the given text. Let me read through it carefully.

The first part talks about the first match being tough for teams because they were close to 20 days away from their last game. Winning in these initial matches is crucial and will help them. It mentions Mehran Ahmadi, a 27-year-old player from Tabriz, who recently signed with Pish Hahrom Abad on a two-year contract after six summer trials.

Then there's an incident involving Hakibak Bateq, a police officer, who was involved in an accident while playing. He collided with Vahid Keshmoutil and made an emergency decision to burn a car during a match where his team was trailing by one goal at halftime but won 2-0 later.

The article also discusses Pish Hahrom Abad's performance early on, winning their first game in five matches with Hakibak leading them back. It mentions

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 16:58:44,393 - Node Texts Length: 1513, Summarized Text Length: 888
2025-01-31 16:58:44,706 - Summarized Text: <think>
Alright, I need to summarize this text. First, I'll read through it carefully to understand the main points.

The text talks about football matches affecting the Iranian team Tabeshen club. Notably, there's something unusual with the red card given to Mohammad Khodabandehlou, which was unexpected because it wasn't given as a first黄牌. Then, the article discusses various incidents from the last few days, including changes in managers and players of the Isfahanning and Esfahan teams.

I see mentions like Ramezanizadeh's transfers to transfer Armin Soroushian, which resulted in his formal relocation. There's also talk about a player named Alireza Shamsi who might be returning from a contract situation. Another interesting point is about Tavoosi Be Broma and her agent Wining-Majakam who seems to have made deals with an external player of poor quality.

Then, ther

Batches:   0%|          | 0/1 [00:01<?, ?it/s]

2025-01-31 17:03:33,270 - Node Texts Length: 364, Summarized Text Length: 375
2025-01-31 17:03:33,314 - Summarized Text: <think>
Okay, so I need to write a detailed summary of this text. Let me read it carefully and understand what's going on.

The first part is in Persian: "ممنونم از کادرفنی گلگهر که به من اعتقاد داشتند و من را در لیستشان گذاشتند" which translates to thanks to the technical committee of Gorgan gas for believing in me and placing me on their list. The person thinks that by their performance, they can make amends and become part of the team.

Then there's information about Mehrdad Mohammadi returning to the national basketball team. He was involved in a statement regarding his participation and transfer activities. He mentions he left the exclusion list because of doping accusations. Also, it's noted that Mosoud Saeedi, an international reserve player, might still be with the team.

There's also something about numerical superiority: Golestan Mahdilemoochi ranked second

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:06:16,198 - Node Texts Length: 160, Summarized Text Length: 304
2025-01-31 17:06:16,220 - Summarized Text: <think>
Alright, so I need to write a detailed summary of this given text. Let me read through the text first.

The user provided a passage in Persian, and they want it summarized in English with about 150 words. The original passage talks about someone trying to choose tasks that will have a positive impact on their future. They mention that some teams are vying for leadership, while others aim for gains or points in this tough situation.

Hmm, okay, so the main idea is about strategic selection of tasks for personal growth and impact. The person is trying to be forward-thinking because they want their efforts to leave a lasting positive effect on themselves in the future.

I should structure the summary to capture both the individual's intention and the competitive environment around them. I'll mention that there are teams competing for leadership, while others foc

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:12:44,268 - Node Texts Length: 585, Summarized Text Length: 727
2025-01-31 17:12:44,279 - Summarized Text: <think>
Alright, I need to write a detailed summary of the given text. Let me read through it carefully first.

The text starts with talking about a football match in the second half where they could have won by scoring three goals but unfortunately lost. It mentions regretting not winning and deciding to leave the team because there's no more help to be done. The author then talks about other players like چریمیا, بلانکو, کаکوتو, رضامدین, میلادزکیپور, سامانفلاح, بیف omissionا and mentions their current situation.

So the main points are:
1. An unfortunate match loss in the second half despite scoring three goals.
2. The author regrets not winning but leaves the team because they can't help anymore.
3. Other players' current standings and their ongoing struggles.
4. Mention of other coaches like مطهری from Chapter 5 who's looking for a new team.

I need to capture all

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:17:35,505 - Node Texts Length: 398, Summarized Text Length: 391
2025-01-31 17:17:35,523 - Summarized Text: <think>
Alright, the user has provided a query in Persian and an English summary. They want me to write a detailed summary of the given text about football events.

First, I need to understand the content thoroughly. The text discusses the tension and pressure during a football match, emphasizing that both teams are focused on securing a result with significant game pressure. It also mentions an announcement about the goalkeeper's contract extension as one of the ongoing developments.

The user has already provided a summary in English, so they might be looking for another version or perhaps want to see if my response matches their expectations. Alternatively, they might need it in Persian but phrased it in English initially by mistake.

I should ensure that the summary captures all key points: the high stakes, focus on results, ongoing developments like contract ann

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:21:05,222 - Node Texts Length: 340, Summarized Text Length: 333
2025-01-31 17:21:05,223 - Summarized Text: <think>
Alright, I need to summarize this text. Let's read through it carefully.

First part talks about the association and team being successful in previous years and having opportunities to improve their standing. Then mentions Georgiy Golosiany as a good manager with a plan for promotion but an unexpected fall after that.

Next, it says the association continued its activities post-graduation, and there was an important exchange deal between two associations.

Lastly, Wingma TAM, from westernistan, might cancel this team's contract soon, aiming to file for transfer in the winter break before filing for relegation.

I need to make sure I cover all these points concisely. Maybe structure it by success story, manager's plan, the exchange deal, and then the potential situation with Wingma TAM.
</think>

The text discusses a football association and its team, which ha

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:30:58,816 - Node Texts Length: 1400, Summarized Text Length: 761
2025-01-31 17:30:59,264 - Summarized Text: <think>
Alright, so I need to write a detailed summary of this text. Let me read through it carefully first. The text is about میلاد فخرالدینی participating in a transfer window with Golestan Shahrjan moving to Asيا. There are several points mentioned:

1. Golestan Shahrjan had good activity in the winter transfer window.
2.米尔اد فخرالدینی played for Golestan Shahrjan and was called up by Golestan Qomayeh.
3. He had a good start to the second half, playing two matches.
4. He is happy about his inclusion.
5. There were mentions of other players being called up due to poor form.
6. The transfer window closed at midnight on Monday (I think it was 9 November), so any changes would only happen from Tuesday morning.

Wait, in the text, it says "6 نظر" which I think is 6 comments or notes, but the main content is about米尔اد and Golestan's transfers. I need to structure this 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:34:37,286 - Node Texts Length: 49, Summarized Text Length: 487
2025-01-31 17:34:37,312 - Summarized Text: <think>
Okay, so I need to write a detailed summary of this Persian text. The user provided some context about "سروش رفیعی" and the dates involved. Let me try to understand what each part means.

First, "سروش رفیعی" translates to something like "Highway Resolution." The main topic seems to be about formalizing the return of Armenia from itsbasketball, probably a sports league or competition. The key dates are March 29 and May 13, which likely mark the restoration and implementation phases.

The user also mentioned that the text is written in Persian by an unknown author. So I need to translate this accurately while maintaining the original meaning. Since it's about formalizing a return after some period away, the summary should highlight the significance of these dates—formalization, implementation, and regional pride.

I'll start by translating each part: "رسمی کردن 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:37:20,932 - Node Texts Length: 240, Summarized Text Length: 259
2025-01-31 17:37:20,934 - Summarized Text: <think>
Alright, I need to write a detailed summary of the given text. Let's break it down.

First, there's a reference to Mehdi Rahimi saying that the user decided on divorce before meeting her. So, she chose the engagement ring from Galgheh instead of other options.

Then, they're planning better results in the next chapter. The character Sarehian arrived, and someone mentioned Javad Ghaznavi's comment about her previous decision being correct.

Next, the focus is on continuing the activities of water transfers. It also mentions a half-hour visit to the reserve and an abrupt presence at the candidate team for promotion to League 2.
</think>

When Galgheh's proposal arrived for you, you chose it completely without any other options. 

Mehdi Rahimi mentioned that you had decided on divorce before your wedding, which was correct.

The goal now is to achieve better res

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:43:06,449 - Node Texts Length: 219, Summarized Text Length: 759
2025-01-31 17:43:06,453 - Summarized Text: <think>
Alright, so I need to summarize this text. Let me read through it again carefully.

The text is in Persian and has a few lines:

"خدا را شکر در این بازی هم توانستیم به بازی برگردیم *در این بازی حارتان برد برد؟
شراط خیلی خوب بود و آنجا پانزده بازی در نیم〖 Fezz حضور داشت
خدا را شکر شرایط خوب است و اینجا با اکثر بچهها قبلا هم یک تیم بودم و البته بیشتر رفیق بودم
خدا را شکر تیم خیلی خوبی داریم که جوی صمیمی دارد امکانات خیلی خوبی هم داریم"

First, I notice it's divided into lines. The first line is a bit unclear with the asterisk and possibly a typo: "در این بازی حارتان برد برد؟" which seems to be incomplete.

I'll start by translating each part.

1. The first line mentions "خدا را شکر در این بازی هم توانستیم به بازی برگردیم," which translates to "O ShKMERRA, in this game too we managed to return to the game." It's asking if they won.

2. The second line talks abou

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:48:08,081 - Node Texts Length: 244, Summarized Text Length: 624
2025-01-31 17:48:08,097 - Summarized Text: <think>
Alright, I need to write a detailed summary of the provided text. Let's see what we've got here.

The text is in Persian and seems to be about a public notice or announcement regarding something related to autumn harvest. The key points are:

1. It mentions "این دو بازی آغازین تو برای گلگاه چطور بود؟" which translates to "How were these two games for you at the apple orchard?" indicating that there were two initial games in an apple orchard.

2. The second part says "این دو بازی هم به شرایط جدولی ما خیلی کمک کرد" which translates to "These two games also helped us a lot with our table conditions." So, the games contributed significantly to improving the tables at their location.

3. The next lines are about the official unveiling (ronomie) of the fourth autumn harvest sale, noting that Sarehaysan is returning home on 8th November 2023, 21:03 hours. This seems

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 17:48:13,416 - Constructing Layer 1
2025-01-31 17:49:57,500 - Summarization Length: 100
2025-01-31 18:04:51,096 - Node Texts Length: 3502, Summarized Text Length: 332
2025-01-31 18:04:52,725 - Summarized Text: <think>
Okay, I need to write a detailed summary based on the provided text about football players Hamid Reza Soltani and Ali Mousavi and their contract extension with Golestan. The text mentions various clauses like salary increment, performance bonuses, medical benefits, and international duty responsibilities. It also notes that these were agreed upon in a meeting but not publicly disclosed.

Alright, so the summary needs to include all these points: the players' names, the club involved (Golestan), the contract extension details, specific terms mentioned—salary increases, performance bonuses, medical coverage, international duties—and that these were finalized in a meeting without public disclosure. I should structure this clearly, perhaps with each term as a bulle

Batches:   0%|          | 0/1 [00:08<?, ?it/s]

2025-01-31 18:22:07,477 - Node Texts Length: 2773, Summarized Text Length: 726
2025-01-31 18:22:09,136 - Summarized Text: <think>
Alright, I need to write a detailed summary based on the given text. Let me start by reading through it carefully to understand all the key points. 

The text discusses米尔adian's role as an intermediary between Golestan Shahrjan and Golestan Qomayeh during the winter transfer window. He was included in two matches for Qomayeh, which shows his positivity and professionalism. It also mentions that the transfer window closed on Monday night (9 November), so any changes would only take effect from Tuesday morning. Additionally, other players like Parvandeh and Aslani were called up due to poor form in the first half of the season.

I need to structure this summary into clear sections for better readability. Maybe start with米尔adian's role and his reception at Qomayeh, followed by the transfer window details and restrictions post-closure. Then mention other players

Batches:   0%|          | 0/1 [00:03<?, ?it/s]

2025-01-31 18:34:58,057 - Node Texts Length: 1221, Summarized Text Length: 316
2025-01-31 18:34:58,854 - Summarized Text: <think> Okay, I need to write a detailed summary of the given text about football matches and related news from November 8, 2003. The text includes several points: a match with 9 players, Peyman Booy on TV, an end-of-winter market day at a blue water park in Isfahan, and an incident involving an acceleration tape thrown onto the field during a football match.

First, I'll read through each section carefully to understand the key events. The first part mentions a football match with 9 players but provides no further details. Next, it talks about Peyman Booy being featured on the evening news discussing the match. Then, there's information about an end-of-winter market day at a blue water park in Isfahan that closed and plans to reopen later. Finally, during one of these matches, a fan threw an acceleration tape onto the field, causing an accelerando stop.

I need to 

Batches:   0%|          | 0/1 [00:06<?, ?it/s]

2025-01-31 18:53:49,917 - Node Texts Length: 734, Summarized Text Length: 1189
2025-01-31 18:53:50,686 - Summarized Text: <think>
Alright, I need to write a detailed summary of the given text. Let me read through it carefully first. The text begins with discussing an upcoming football match in the second half where they almost won by scoring three goals but unfortunately lost. The author expresses regret for not winning and decides to leave the team because there's no more help needed. They then mention other players like چریمیا, بلانکو, کاکوتو, رضامدین, میلادزکیپور, سامانفلاح, بیف omissio, and others, noting their current standings and struggles. The author also refers to other coaches like مطهری from Chapter 5 who is looking for a new team.

So, the main points I need to capture are:
1. The match loss in the second half despite scoring three goals.
2. The regret for not winning and leaving the team because they can't help anymore.
3. The current standings and struggles of other playe

Batches:   0%|          | 0/1 [00:03<?, ?it/s]

2025-01-31 18:58:55,248 - Node Texts Length: 309, Summarized Text Length: 371
2025-01-31 18:58:55,369 - Summarized Text: <think>
Alright, let's tackle this query step by step. The user has provided a text in Persian, which translates roughly to discussing an individual's proactive approach to selecting tasks aimed at positively impacting their future amidst competition. They want a detailed summary of about 150 words in English.

First, I need to understand the main points of the text. It seems the person is in a competitive situation where teams are vying for leadership, gains, or points. However, they're determined to choose tasks that will have a positive impact on their future. The challenge here is to capture both the individual's intention and the competitive environment accurately.

Next, I should consider the audience and context. It seems like this might be an academic or motivational piece, perhaps discussing strategic planning or personal development in a competitive setting

Batches:   0%|          | 0/1 [00:04<?, ?it/s]

2025-01-31 19:07:50,032 - Node Texts Length: 1384, Summarized Text Length: 435
2025-01-31 19:07:50,192 - Summarized Text: <think>
Alright, let's tackle this query step by step. The user has provided a text in Persian and an English summary they want me to write. My goal is to create a detailed summary of the given text about football events, emphasizing tension, pressure, and ongoing developments like contract announcements.

First, I need to understand the content thoroughly. The text discusses a football match with high stakes—both teams are focused on securing results as the final whistle approaches in the winter window. It mentions an announcement about the goalkeeper's contract extension as one of the ongoing developments. 

Next, I should consider what the user is asking for. They provided their own summary in English and now expect another detailed version. Perhaps they want to see if my response aligns with theirs or needs adjustments.

I'll structure the summary by starting wi

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-31 19:18:28,892 - Node Texts Length: 661, Summarized Text Length: 955
2025-01-31 19:18:29,045 - Summarized Text: <think>
Alright, so I need to help write a detailed summary of this text about Beivomaa, which is an Iranian football club. Let me start by reading through the provided text carefully to understand all the key points.

First, Beivomaa started their season with a goal equaling Iran's national anthem, which was "Ode to June 23." This seems like a strong opening and sets a positive tone for the team. They've also scored goals in matches against teams named Ardak Milan Esfahan, MAS Rafflesegan, and Ghaloom Sairsian in their history. These past performances show consistency and strength in attack.

Moving on, Beivomaa has played several games this season, all of which they have scored in, including against Ardak Milan Esfahan, MAS Rafflesegan, and Ghaloom Sairsian. This indicates that their scoring ability is a key part of their game plan and contributes to their competit

Batches:   0%|          | 0/1 [00:01<?, ?it/s]

2025-01-31 19:34:00,348 - Node Texts Length: 3197, Summarized Text Length: 567
2025-01-31 19:34:00,779 - Summarized Text: <think>
Alright, so I need to write a detailed summary of this text. Let me first read through it carefully to understand the main points.

The text starts with "این دو بازی آغازین تو برای گلگاه چطور بود؟" which translates to "How were these two games for you at the apple orchard?" It seems like there are two initial games at an apple orchard, and it's asking about their conditions. Then it says "این دو بازی هم به شرایط جدولی ما خیلی کمک کرد" meaning these two games also helped us a lot with our table conditions.

Next, the text mentions "ronomie" which I think is the official unveiling of something. It says Sarehaysan will return home on 8th November at 21:03 hours in the Iranian calendar system. That's important as it's a specific date and time.

Then there's "بیفوما, چهره ویژه روز پایانی بازار زمستانه" which translates to "A special face on the last day of the au

Batches:   0%|          | 0/1 [00:05<?, ?it/s]

2025-01-31 19:34:19,787 - Constructing Layer 2
2025-01-31 19:34:20,040 - Stopping Layer construction: Cannot Create More Layers. Total Layers in tree: 2
2025-01-31 19:34:24,316 - Successfully initialized TreeRetriever with Config 
        TreeRetrieverConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Context Embedding Model: EMB
            Embedding Model: <__main__.SBertEmbeddingModel object at 0x000001F5FFA81180>
            Num Layers: None
            Start Layer: None
        


In [5]:
RA.visualize_tree()

### Querying from the tree

```python
question = # any question
RA.answer_question(question)
```

In [21]:
question = "هافبک باتجربه پرسپولیس در چه دقیقه ای و توسط چه کسی از بازی اخراج شد؟"

answer = RA.answer_question(question=question, collapse_tree=False)

print("Answer: ", answer)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-02-01 19:41:04,869 - HTTP Request: POST https://models.inference.ai.azure.com/chat/completions "HTTP/1.1 200 OK"


Answer:  هافبک باتجربه پرسپولیس، محمد خدابنده‌لو، در یکی از بازی‌های اخیر به طور زودهنگام اخراج شد. اما در متن به دقیقه دقیق اخراج او اشاره‌ای نشده است.


In [6]:
# Save the tree by calling RA.save("path/to/save")
SAVE_PATH = "demo/football360_gpt-4o-mini"
RA.save(SAVE_PATH)

2025-02-01 19:30:23,084 - Tree successfully saved to demo/football360_gpt-4o-mini


In [None]:
# load back the tree by passing it into RetrievalAugmentation

RA = RetrievalAugmentation(tree=SAVE_PATH)

answer = RA.answer_question(question=question)
print("Answer: ", answer)

## Using other Open Source Models for Summarization/QA/Embeddings

If you want to use other models such as Llama or Mistral, you can very easily define your own models and use them with RAPTOR. 

In [1]:
import torch
from raptor import BaseSummarizationModel, BaseQAModel, BaseEmbeddingModel, RetrievalAugmentationConfig, RetrievalAugmentation
from transformers import AutoTokenizer, pipeline

2025-01-28 18:01:16,680 - Loading faiss with AVX2 support.
2025-01-28 18:01:16,799 - Successfully loaded faiss with AVX2 support.


In [3]:
# if you want to use the Gemma, you will need to authenticate with HuggingFace, Skip this step, if you have the model already downloaded
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Token has not been saved to git credential helper.
2025-01-28 13:30:31,671 - Token has not been saved to git credential helper.


In [4]:
from transformers import AutoTokenizer, pipeline
import torch

# You can define your own Summarization model by extending the base Summarization Class. 
class GEMMASummarizationModel(BaseSummarizationModel):
    def __init__(self, model_name="google/gemma-2b-it"):
        # Initialize the tokenizer and the pipeline for the GEMMA model
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.summarization_pipeline = pipeline(
            "text-generation",
            model=model_name,
            model_kwargs={"torch_dtype": torch.bfloat16},
            device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),  # Use "cpu" if CUDA is not available
        )

    def summarize(self, context, max_tokens=150):
        # Format the prompt for summarization
        messages=[
            {"role": "user", "content": f"Write a summary of the following, including as many key details as possible: {context}:"}
        ]
        
        prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
        # Generate the summary using the pipeline
        outputs = self.summarization_pipeline(
            prompt,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            top_p=0.95
        )
        
        # Extracting and returning the generated summary
        summary = outputs[0]["generated_text"].strip()
        return summary


In [5]:
class GEMMAQAModel(BaseQAModel):
    def __init__(self, model_name= "google/gemma-2b-it"):
        # Initialize the tokenizer and the pipeline for the model
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.qa_pipeline = pipeline(
            "text-generation",
            model=model_name,
            model_kwargs={"torch_dtype": torch.bfloat16},
            device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
        )

    def answer_question(self, context, question):
        # Apply the chat template for the context and question
        messages=[
              {"role": "user", "content": f"Given Context: {context} Give the best full answer amongst the option to question {question}"}
        ]
        prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
        # Generate the answer using the pipeline
        outputs = self.qa_pipeline(
            prompt,
            max_new_tokens=256,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            top_p=0.95
        )
        
        # Extracting and returning the generated answer
        answer = outputs[0]["generated_text"][len(prompt):]
        return answer

In [2]:
from sentence_transformers import SentenceTransformer
class SBertEmbeddingModel(BaseEmbeddingModel):
    def __init__(self, model_name="BAAI/bge-small-en-v1.5"):
        self.model = SentenceTransformer(model_name)

    def create_embedding(self, text):
        return self.model.encode(text)


In [8]:
RAC = RetrievalAugmentationConfig(summarization_model=GEMMASummarizationModel(), qa_model=GEMMAQAModel(), embedding_model=SBertEmbeddingModel())

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu
2025-01-28 11:25:09,865 - Use pytorch device_name: cpu
2025-01-28 11:25:09,868 - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5


In [9]:
RA = RetrievalAugmentation(config=RAC)

2025-01-28 11:25:25,518 - Successfully initialized TreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Summarization Length: 100
            Summarization Model: <__main__.GEMMASummarizationModel object at 0x000001AB0D0CF790>
            Embedding Models: {'EMB': <__main__.SBertEmbeddingModel object at 0x000001AB18E50A60>}
            Cluster Embedding Model: EMB
        
        Reduction Dimension: 10
        Clustering Algorithm: RAPTOR_Clustering
        Clustering Parameters: {}
        
2025-01-28 11:25:25,520 - Successfully initialized ClusterTreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            

In [18]:
with open('demo/sample.txt', 'r') as file:
    text = file.read()
    
RA.add_documents(text)

2025-01-27 18:47:30,953 - Creating Leaf Nodes


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-27 18:47:44,908 - Created 35 Leaf Embeddings
2025-01-27 18:47:44,910 - Building All Nodes
2025-01-27 18:47:44,919 - Using Cluster TreeBuilder
2025-01-27 18:47:44,921 - Constructing Layer 0
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

2025-01-27 18:48:12,514 - Summarization Length: 100


In [None]:
question = "How did Cinderella reach her happy ending?"

answer = RA.answer_question(question=question)

print("Answer: ", answer)

In [3]:
from raptor import OllamaQAModelV2, OllamaSummarizationModelV2
qa_llm = 'llama3.2:latest'
qa_model = OllamaQAModelV2(model_name= qa_llm)
summarization_llm = 'llama3.2:latest'
summarization_model = OllamaSummarizationModelV2(model_name= summarization_llm)

RAC = RetrievalAugmentationConfig(qa_model=qa_model, summarization_model=summarization_model, embedding_model=SBertEmbeddingModel())
RA = RetrievalAugmentation(config=RAC, tree="demo/football360")

2025-01-28 18:01:31,000 - Use pytorch device_name: cpu
2025-01-28 18:01:31,003 - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-01-28 18:01:37,385 - Successfully initialized TreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Max Tokens: 100
            Num Layers: 5
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Summarization Length: 100
            Summarization Model: <raptor.SummarizationModels.OllamaSummarizationModelV2 object at 0x000001E7B8173430>
            Embedding Models: {'EMB': <__main__.SBertEmbeddingModel object at 0x000001E7B8173370>}
            Cluster Embedding Model: EMB
        
        Reduction Dimension: 10
        Clustering Algorithm: RAPTOR_Clustering
        Clustering Parameters: {}
        
2025-01-28 18:01:37,387 - Successfully initialized ClusterTreeBuilder with Config 
        TreeBuilderConfig:
            Tokenizer: <Encoding '

In [5]:
f = open("demo/data4.txt", "r", encoding="utf-8")
text = ""
for x in f:
  text += x

In [6]:
RA.add_documents(text)

2025-01-28 17:00:41,397 - Creating Leaf Nodes


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:01:13,348 - Created 102 Leaf Embeddings
2025-01-28 17:01:13,351 - Building All Nodes
2025-01-28 17:01:13,368 - Using Cluster TreeBuilder
2025-01-28 17:01:13,370 - Constructing Layer 0
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

2025-01-28 17:01:52,409 - Summarization Length: 100
2025-01-28 17:03:46,554 - Node Texts Length: 959, Summarized Text Length: 637


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:06:34,609 - Node Texts Length: 1400, Summarized Text Length: 929


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:07:35,762 - Node Texts Length: 398, Summarized Text Length: 168


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:08:54,634 - Node Texts Length: 553, Summarized Text Length: 456


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:09:35,165 - Node Texts Length: 302, Summarized Text Length: 223


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:10:08,725 - Node Texts Length: 131, Summarized Text Length: 260


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:11:09,783 - Node Texts Length: 364, Summarized Text Length: 482


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:12:03,845 - Node Texts Length: 339, Summarized Text Length: 200


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:13:17,244 - Node Texts Length: 556, Summarized Text Length: 456


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:14:05,164 - Node Texts Length: 326, Summarized Text Length: 306


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:15:53,484 - Node Texts Length: 1144, Summarized Text Length: 451


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:18:47,109 - Node Texts Length: 1105, Summarized Text Length: 924


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:21:15,862 - Node Texts Length: 1050, Summarized Text Length: 969


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:24:29,022 - Node Texts Length: 1945, Summarized Text Length: 851


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:25:47,806 - Node Texts Length: 412, Summarized Text Length: 542


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:27:11,541 - Node Texts Length: 192, Summarized Text Length: 631


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:28:07,082 - Node Texts Length: 221, Summarized Text Length: 406


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:28:07,610 - Constructing Layer 1
2025-01-28 17:28:18,424 - Summarization Length: 100
2025-01-28 17:33:36,270 - Node Texts Length: 2889, Summarized Text Length: 1437


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:38:03,577 - Node Texts Length: 2413, Summarized Text Length: 470


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:41:05,075 - Node Texts Length: 1174, Summarized Text Length: 975


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:41:58,915 - Node Texts Length: 459, Summarized Text Length: 222


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:45:46,143 - Node Texts Length: 1567, Summarized Text Length: 1429


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:46:28,132 - Node Texts Length: 409, Summarized Text Length: 175


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-28 17:46:28,394 - Constructing Layer 2
2025-01-28 17:46:28,396 - Stopping Layer construction: Cannot Create More Layers. Total Layers in tree: 2
2025-01-28 17:46:28,430 - Successfully initialized TreeRetriever with Config 
        TreeRetrieverConfig:
            Tokenizer: <Encoding 'cl100k_base'>
            Threshold: 0.5
            Top K: 5
            Selection Mode: top_k
            Context Embedding Model: EMB
            Embedding Model: <__main__.SBertEmbeddingModel object at 0x000001B3EAABC1F0>
            Num Layers: None
            Start Layer: None
        


In [4]:
question = "خریدهای زمستانی استقلال چه کسانی بودند؟"
answer = RA.answer_question(question=question, return_layer_information=True)
print("Answer: ", answer)

2025-01-28 18:01:57,129 - Using collapsed_tree


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Answer:  ('هشتاد و چهارمین خرید زمستانی استقلال سهرابیان  بود.', [{'node_index': 2, 'layer_number': 0}, {'node_index': 92, 'layer_number': 0}, {'node_index': 57, 'layer_number': 0}, {'node_index': 73, 'layer_number': 0}, {'node_index': 28, 'layer_number': 0}, {'node_index': 6, 'layer_number': 0}, {'node_index': 15, 'layer_number': 0}, {'node_index': 25, 'layer_number': 0}, {'node_index': 67, 'layer_number': 0}, {'node_index': 19, 'layer_number': 0}])


In [7]:
SAVE_PATH = "demo/football360"
RA.save(SAVE_PATH)

2025-01-28 17:47:21,635 - Tree successfully saved to demo/football360


In [5]:
## Now see the tree build in action
RA.visualize_tree()