###############################################################################
# SrswtiDeduplicator
###############################################################################

In [None]:
# --------------------------- Imports -------------------------------
from srswti_axis import SrswtiDeduplicator
from utils import call_groq_llm,SAMPLE_DOCS
# --------------------------- Basic 3 Demos -------------------------

# Initialize the deduplicator
deduper = SrswtiDeduplicator()

# Sample Docs
docs_dedupe = [
    "This is a unique sentence about traveling to Mars.",
    "This is a unique sentence about traveling to Mars.",
    "Different content about traveling to Jupiter."
]

  from .autonotebook import tqdm as notebook_tqdm
2025-03-10 15:12:22.684475: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741599742.703847  941064 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741599742.709013  941064 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-10 15:12:22.729694: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO:datasets:PyTorch version 2.5.0 available.
INFO:datasets:TensorFlow ver

In [None]:
# Deduplicate the sample documents with a threshold of 0.8
# This removes similar documents based on semantic similarity
unique_docs_1 = deduper.deduplicate(docs_dedupe, threshold=0.8)

# Print the results of the first deduplication example
print("== Dedupe 1 ==")
print(unique_docs_1, "\n")  # \n adds a blank line after the output

== Dedupe 1 ==
['This is a unique sentence about traveling to Mars.', 'Different content about traveling to Jupiter.'] 



In [None]:
# Second deduplication example
unique_docs_2 = deduper.deduplicate(docs_dedupe, threshold=0.8, return_indices=True)
print("== Dedupe 2 (indices) ==")
print(unique_docs_2[0], "\n")


== Dedupe 2 (indices) ==
1 



In [None]:
import random
# ------------------- - RAG Demos ----------------------
def rag_demo_a(docs=SAMPLE_DOCS):
    """
    Demonstrates a basic RAG workflow by deduplicating documents and asking the LLM to summarize them.
    
    This function:
    1. Deduplicates the input documents with a 0.9 similarity threshold
    2. Creates a prompt requesting a summary of the unique documents
    3. Calls a Groq LLM to generate the summary
    4. Prints the LLM's response
    
    Args:
        docs (list): List of text documents to process (defaults to SAMPLE_DOCS)
    """
    unique = deduper.deduplicate(docs, threshold=0.9)
    prompt = f"Unique docs:\n{unique}\nSummarize them briefly."
    resp = call_groq_llm(prompt)
    print("[Theorem8 RAG Demo A] LLM:\n", resp, "\n")

def rag_demo_b(user_query, docs=SAMPLE_DOCS):
    """
    Demonstrates a query-based RAG workflow with document deduplication.
    
    This function:
    1. Deduplicates the input documents with a 0.9 similarity threshold
    2. Randomly selects one unique document as context
    3. Creates a prompt with the user's query and the selected document
    4. Calls a Groq LLM to generate a response based on the context
    5. Prints the LLM's response
    
    Args:
        user_query (str): The user's question or request
        docs (list): List of text documents to process (defaults to SAMPLE_DOCS)
    """
    unique = deduper.deduplicate(docs, threshold=0.9)
    chosen = random.choice(unique) if unique else "No doc found."
    prompt = (
        f"User query: {user_query}\nChosen doc:\n{chosen}\n"
        "Respond accordingly."
    )
    resp = call_groq_llm(prompt)
    print("[Theorem8 RAG Demo B] LLM:\n", resp, "\n")



In [None]:
rag_demo_a(SAMPLE_DOCS)

[Theorem8 RAG Demo A] LLM:
 [Groq LLM] Here's a brief summary of the unique documents:

1. **Neural Networks**: An overview of neural networks, including their structure, learning process, types (e.g., CNNs, RNNs), and applications (e.g., image recognition, natural language processing).
2. **Generative AI and Blockchain Integration**: Exploring the intersection of generative AI and blockchain technology, including NFT creation, decentralized AI training, and on-chain AI models, as well as challenges and considerations.
3. **Quantum Computing**: Introducing quantum computing, its principles, and potential impact on fields like cryptography, drug discovery, and artificial intelligence, while highlighting current challenges and the ongoing development of more powerful quantum processors. 



In [None]:
rag_demo_b("How can quantum computing affect cryptography?", SAMPLE_DOCS)


[Theorem8 RAG Demo B] LLM:
 [Groq LLM] The provided document does not discuss the impact of quantum computing on cryptography. It focuses on the integration of generative AI and blockchain technology, covering topics such as NFT creation, decentralized AI training, on-chain AI models, governance, and data marketplaces. If you're looking for information on how quantum computing affects cryptography, I'd be happy to provide a general overview.

Quantum computing has the potential to significantly impact cryptography, as it can potentially break certain types of classical encryption algorithms. Quantum computers can process vast amounts of information in parallel, making them much faster than classical computers for certain types of calculations. This means that quantum computers could potentially factor large numbers exponentially faster than classical computers, which would break many encryption algorithms currently in use, such as RSA.

However, quantum computing also has the potential