###############################################################################
# SrswtiClusterer
###############################################################################


In [1]:
# ---------------------------  Imports -------------------------------
from srswti_axis import SrswtiClusterer
from utils import call_groq_llm, SAMPLE_DOCS
# ---------------------------  Basic 3 Demos -------------------------
clusterer = SrswtiClusterer()

docs_cluster = [
    "Neural networks are part of ML.",
    "SVMs are older but still used.",
    "Reinforcement learning is RL.",
    "Shakespeare was an English playwright.",
    "The Beatles were an English rock band."
]

  from .autonotebook import tqdm as notebook_tqdm
2025-03-10 15:10:13.319183: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741599613.338179  938910 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741599613.343511  938910 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-10 15:10:13.363347: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO:datasets:PyTorch version 2.5.0 available.
INFO:datasets:TensorFlow ver

INFO:wordllama.wordllama:Downloading tokenizer file 'l2_supercat_tokenizer_config.json' from Hugging Face repository 'dleemiller/word-llama-l2-supercat'.


In [None]:
# Cluster the documents using SrswtiClusterer
# Parameters:
#   - docs_cluster: List of text documents to be clustered
#   - k: Number of clusters to form (3 in this case)
#   - max_iterations: Maximum iterations for the clustering algorithm (500)
labels_c, score_c = clusterer.cluster_documents(docs_cluster, k=3, max_iterations=500)

# Print the results
print("== Clusterer 1 ==")
# Labels show which cluster each document belongs to (0, 1, or 2)
# Score represents the quality of clustering 
print("Labels:", labels_c, "Score:", score_c, "\n")

INFO:kmeans_logger:Initialization 1/3: Inertia = 1.76, Iterations = 5, Time = 0.00 seconds
INFO:kmeans_logger:New best inertia: 1.76
INFO:kmeans_logger:Initialization 2/3: Inertia = 1.52, Iterations = 5, Time = 0.00 seconds
INFO:kmeans_logger:New best inertia: 1.52
INFO:kmeans_logger:Initialization 3/3: Inertia = 1.52, Iterations = 5, Time = 0.00 seconds
INFO:kmeans_logger:KMeans clustering complete. Best inertia: 1.52
INFO:kmeans_logger:Total kmeans clustering time: 0.00 seconds


== Clusterer 1 ==
Labels: [0, 1, 0, 2, 2] Score: 1.5188791751861572 



In [None]:
# Extract documents labeled as cluster 0 (machine learning related documents)
ml_docs = [doc for doc, lbl in zip(docs_cluster, labels_c) if lbl == 0]
print("== Clusterer 2: ML docs ==")
# Display the extracted machine learning documents
print(ml_docs, "\n")

== Clusterer 2: ML docs ==
['Neural networks are part of ML.', 'Reinforcement learning is RL.'] 



In [None]:
# -------------------  - 2 Groq LLM + RAG Demos ----------------------

def rag_demo_a(docs=SAMPLE_DOCS):
    """
    Demonstrates a basic RAG (Retrieval-Augmented Generation) workflow that:
    1. Clusters the input documents into 2 groups
    2. Creates a map of documents by cluster
    3. Asks an LLM to summarize the content of each cluster
    
    Args:
        docs (list): List of text documents to process (defaults to SAMPLE_DOCS)
        
    Returns:
        None: Prints the LLM's response to the console
    """
    if not docs:
        print("[ RAG Demo A] No docs.")
        return
    
    # Cluster the documents into 2 groups
    cluster_labels, _ = clusterer.cluster_documents(docs, k=2)
    
    # Group documents by their assigned cluster
    cluster_map = {}
    for doc, lbl in zip(docs, cluster_labels):
        cluster_map.setdefault(lbl, []).append(doc)
    
    # Create prompt asking LLM to summarize each cluster
    prompt = f"Docs grouped into 2 clusters:\n{cluster_map}\nSummarize each cluster."
    
    # Call the LLM with the prompt and print the response
    resp = call_groq_llm(prompt)
    print("[ RAG Demo A] LLM:\n", resp, "\n")


def rag_demo_b(user_query, docs=SAMPLE_DOCS):
    """
    Demonstrates a query-based RAG workflow that:
    1. Clusters the input documents into 2 groups
    2. Selects only documents from cluster 0
    3. Uses these documents to answer the user's query with an LLM
    
    Args:
        user_query (str): The question or query from the user
        docs (list): List of text documents to process (defaults to SAMPLE_DOCS)
        
    Returns:
        None: Prints the LLM's response to the console
    """
    if not docs:
        print("[ RAG Demo B] No docs.")
        return
    
    # Cluster the documents into 2 groups
    cluster_labels, _ = clusterer.cluster_documents(docs, k=2)
    
    # Select only documents from cluster 0
    chosen_docs = [doc for doc, lbl in zip(docs, cluster_labels) if lbl == 0]
    
    # Create prompt with user query and selected documents
    prompt = (
        f"User query: {user_query}\nDocs in cluster 0:\n{chosen_docs}\n"
        "Answer only with these docs."
    )
    
    # Call the LLM with the prompt and print the response
    resp = call_groq_llm(prompt)
    print("[ RAG Demo B] LLM:\n", resp, "\n")

In [5]:
rag_demo_a(SAMPLE_DOCS)


INFO:kmeans_logger:Initialization 1/3: Inertia = 0.69, Iterations = 5, Time = 0.00 seconds
INFO:kmeans_logger:New best inertia: 0.69
INFO:kmeans_logger:Initialization 2/3: Inertia = 0.69, Iterations = 5, Time = 0.00 seconds
INFO:kmeans_logger:Initialization 3/3: Inertia = 0.61, Iterations = 5, Time = 0.00 seconds
INFO:kmeans_logger:New best inertia: 0.61
INFO:kmeans_logger:KMeans clustering complete. Best inertia: 0.61
INFO:kmeans_logger:Total kmeans clustering time: 0.00 seconds


[Theorem7 RAG Demo A] LLM:
 [Groq LLM] **Cluster 0: Neural Networks and AI**

This cluster focuses on the concepts and applications of neural networks, a key area of artificial intelligence. The documents cover the structure, learning processes, and types of neural networks, including feedforward, convolutional, and recurrent neural networks. The applications of neural networks in image and speech recognition, natural language processing, game playing, and medical diagnosis are also discussed. The challenges of neural networks, such as overfitting and interpretability, are mentioned, but the overall tone is positive, highlighting the potential of neural networks to drive innovations in various fields.

**Cluster 1: Emerging Technologies - Blockchain, Generative AI, and Quantum Computing**

This cluster explores the intersection of emerging technologies, including blockchain, generative AI, and quantum computing. The documents discuss the potential applications of these technologies, su

In [6]:
rag_demo_b("Explain AI approaches", SAMPLE_DOCS)


INFO:kmeans_logger:Initialization 1/3: Inertia = 0.61, Iterations = 5, Time = 0.00 seconds
INFO:kmeans_logger:New best inertia: 0.61
INFO:kmeans_logger:Initialization 2/3: Inertia = 0.61, Iterations = 5, Time = 0.00 seconds
INFO:kmeans_logger:Initialization 3/3: Inertia = 0.61, Iterations = 5, Time = 0.00 seconds
INFO:kmeans_logger:KMeans clustering complete. Best inertia: 0.61
INFO:kmeans_logger:Total kmeans clustering time: 0.00 seconds


[Theorem7 RAG Demo B] LLM:
 [Groq LLM] There are several AI approaches, specifically within the realm of Neural Networks. These include:

1. **Feedforward Neural Networks**: Information flows in one direction, making them suitable for simple applications.
2. **Convolutional Neural Networks (CNNs)**: Specialized for image processing, CNNs are widely used in computer vision tasks.
3. **Recurrent Neural Networks (RNNs)**: Handle sequential data with memory, making them suitable for tasks like speech recognition and natural language processing.
4. **Transformers**: An advanced architecture for natural language processing, transformers have shown impressive results in tasks like language translation and text generation.

These AI approaches have various applications, including:

* Image and speech recognition
* Natural language processing
* Game playing and decision making
* Medical diagnosis and predictions

However, neural networks also come with challenges like overfitting, computational