###############################################################################
# SRSWTIHilbertSearch
###############################################################################

In [1]:
# ---------------------------  Imports ------------------------------
from srswti_axis import SRSWTIHilbertSearch
from utils import call_groq_llm, SAMPLE_DOCS

  from .autonotebook import tqdm as notebook_tqdm
2025-03-11 23:42:32.523950: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741716752.537289 1021057 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741716752.541078 1021057 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-11 23:42:32.555896: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO:datasets:PyTorch version 2.5.0 available.
INFO:datasets:TensorFlow ver

# ---------------------------  Basic Demos ------------------------


In [2]:
# Initialize the SRSWTIHilbertSearch with pointwise approach
# (This compares individual documents against queries)
hilbert = SRSWTIHilbertSearch(approach='pointwise')

# Define training data:
# 1. List of example queries across different domains
train_queries = ["machine learning", "sports", "finance"]

# 2. List of lists, where each sublist contains documents related to the corresponding query
train_docs = [
    ["ML doc1", "ML doc2", "ML doc3"],             # Documents for "machine learning"
    ["sports doc1", "sports doc2", "sports doc3"],  # Documents for "sports"
    ["finance doc1", "finance doc2", "finance doc3"] # Documents for "finance"
]

# 3. Relevance scores for each document-query pair
# Higher scores indicate more relevant documents (1.0 is most relevant)
train_relevances = [
    [1.0, 0.8, 0.2],  # Relevance scores for ML documents
    [0.9, 0.85, 0.3], # Relevance scores for sports documents
    [1.0, 0.5, 0.4]   # Relevance scores for finance documents
]

# Train the model for 2 epochs using the training data
hilbert.train(train_queries, train_docs, train_relevances, epochs=2)

# Uncomment to save the trained model to disk
# hilbert.save_model('hilbert_model')

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-mpnet-base-v2
Batches: 100%|██████████| 1/1 [00:00<00:00,  3.73it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 25.37it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 37.91it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 27.43it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 36.11it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 22.05it/s]


In [3]:
# Test the trained model with new documents
test_docs = ["ML doc 4", "Some sports doc4", "Sport Doc 4"]

# Rank test documents for "machine learning" query using pointwise approach
rank_results = hilbert.rank_documents("machine learning", test_docs)
print("== Hilbert 2: pointwise ==")
print(rank_results, "\n")

Batches: 100%|██████████| 1/1 [00:00<00:00, 32.87it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 22.46it/s]

== Hilbert 2: pointwise ==
[(np.int64(0), np.float32(0.5130338)), (np.int64(1), np.float32(0.47368836)), (np.int64(2), np.float32(0.47124225))] 






In [4]:
# Initialize the SRSWTIHilbertSearch with listwise approach
# (This considers relationships between documents in the ranking process)
hilbert_listwise = SRSWTIHilbertSearch(approach='listwise')

# Train the model using the same training data as the pointwise approach
# The listwise approach optimizes for the entire ranked list rather than individual document scores
hilbert_listwise.train(train_queries, train_docs, train_relevances, epochs=2)

# Test the trained listwise model by ranking test documents for "finance" query
# This will compare how differently the pointwise and listwise approaches rank the same documents
rank_results_listwise = hilbert_listwise.rank_documents("finance", test_docs)
print("== Hilbert 3: listwise ==")
print(rank_results_listwise, "\n")

# The listwise approach is particularly useful for:
# - Optimizing the entire ranking order rather than individual relevance scores
# - Capturing interdependencies between documents in search results
# - Potentially providing more coherent result sets for complex queries

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-mpnet-base-v2
Batches: 100%|██████████| 1/1 [00:00<00:00, 34.50it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 28.26it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 39.71it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 28.47it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 40.65it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 28.49it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 36.69it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 26.49it/s]

== Hilbert 3: listwise ==
[(np.int64(0), np.float32(0.35199177)), (np.int64(2), np.float32(0.32481337)), (np.int64(1), np.float32(0.32319486))] 






# ------------------- RAG Demos ---------------------


In [5]:
def rag_demo_a(user_query, docs=SAMPLE_DOCS):
    """
    Demonstrates RAG (Retrieval-Augmented Generation) using pointwise approach.
    
    This function ranks documents against the user query using the pointwise Hilbert model,
    retrieves the top document, and generates an answer using an LLM.
    
    Args:
        user_query (str): The query from the user
        docs (list): List of documents to search through, defaults to SAMPLE_DOCS
        
    Returns:
        None: Prints the ranked documents and LLM response
    """
    ranked = hilbert.rank_documents(user_query, docs)
    print(ranked)
    if ranked:
        sorted_docs = sorted(ranked, key=lambda x: x[1], reverse=True)
        top_doc = docs[sorted_docs[0][0]]
    else:
        top_doc = "No doc"
    prompt = f"User query: {user_query}\nTop doc: {top_doc}\nAnswer the query."
    resp = call_groq_llm(prompt)
    print("[Theorem11 RAG Demo A] LLM:\n", resp, "\n")

def rag_demo_b(user_query, docs=SAMPLE_DOCS):
    """
    Demonstrates RAG (Retrieval-Augmented Generation) using listwise approach.
    
    This function ranks documents against the user query using the listwise Hilbert model,
    retrieves the top document, and generates an answer using an LLM.
    
    Args:
        user_query (str): The query from the user
        docs (list): List of documents to search through, defaults to SAMPLE_DOCS
        
    Returns:
        None: Prints the LLM response with the top document retrieval
    """
    # Use listwise
    ranked = hilbert_listwise.rank_documents(user_query, docs)
    if ranked:
        sorted_docs = sorted(ranked, key=lambda x: x[1], reverse=True)
        top_doc = docs[sorted_docs[0][0]]
    else:
        top_doc = "No doc"
    prompt = f"User query: {user_query}\nTop doc (listwise): {top_doc}\nAnswer the query."
    resp = call_groq_llm(prompt)
    print("[Theorem11 RAG Demo B] LLM:\n", resp, "\n")


In [6]:
rag_demo_a("Explain machine learning", SAMPLE_DOCS)

Batches: 100%|██████████| 1/1 [00:00<00:00, 28.37it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  1.73it/s]


[(np.int64(0), np.float32(0.47484967)), (np.int64(2), np.float32(0.46288988)), (np.int64(1), np.float32(0.46054238))]
[Theorem11 RAG Demo A] LLM:
 [Groq LLM] Machine learning is a subset of artificial intelligence that involves the use of algorithms and statistical models to enable machines to perform a specific task without using explicit instructions. One of the key concepts in machine learning is neural networks, which are computational models inspired by the human brain's architecture.

Neural networks consist of interconnected nodes (neurons) organized in layers, including input, hidden, and output layers. The structure of these networks involves neurons receiving inputs, applying weights, summing them, and passing through activation functions. The learning process in neural networks occurs through the adjustment of weights during training processes like backpropagation.

There are various types of neural networks, including feedforward neural networks, convolutional neural networ

In [None]:
rag_demo_b("What is generative AI?", SAMPLE_DOCS)


Batches: 100%|██████████| 1/1 [00:00<00:00, 48.65it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 23.44it/s]


[Theorem11 RAG Demo B] LLM:
 [Groq LLM] The provided information about neural networks is related to the topic of generative AI, as neural networks are a fundamental component of many generative AI models. However, it doesn't explicitly define what generative AI is.

To answer the query: Generative AI refers to a type of artificial intelligence that uses neural networks and other machine learning algorithms to generate new, synthetic data that is similar to existing data. This can include images, videos, music, text, and other forms of data. Generative AI models, such as Generative Adversarial Networks (GANs) and Variational Autoencoders (VAEs), use neural networks to learn patterns and structures in data and then generate new data that is consistent with those patterns.

In the context of the provided information, generative AI can be seen as an application of neural networks, particularly those that involve complex architectures like CNNs, RNNs, and Transformers. However, a more deta