In [1]:
import requests
import json
import math
from typing import List, Tuple

In [None]:
class SimpleQLM:
    def __init__(self, model_name="llama3"):
        self.model_name = model_name
        self.ollama_url = "http://localhost:11434/api/generate"
    
    def get_perplexity(self, text: str) -> float:
        payload = {
            "model": self.model_name,
            "prompt": text,
            "stream": False,
            "options": {
                "temperature": 0,
                "num_predict": 5  # Generate a few tokens to get likelihood
            }
        }
        
        try:
            response = requests.post(self.ollama_url, json=payload)
            if response.status_code == 200:
                result = response.json()
                # Use response metrics as proxy for perplexity
                response_length = len(result.get('response', ''))
                prompt_length = len(text.split())
                
                # Simple perplexity approximation
                # Lower values = better fit = more relevant
                perplexity = prompt_length / max(1, response_length + 1)
                return perplexity
            else:
                return float('inf')
        except Exception as e:
            print(f"Error calling Ollama: {e}")
            return float('inf')
    
    def score_document(self, query: str, document: str) -> float:
        # Create prompt for the LLM to evaluate relevance
        prompt = f"Given this query: '{query}'\nHow relevant is this document: '{document}'\nRelevance score (0-10):"
        
        # Get perplexity/score from the model
        perplexity = self.get_perplexity(prompt)
        
        # Also use simple text overlap as backup
        query_words = set(query.lower().split())
        doc_words = set(document.lower().split())
        overlap = len(query_words.intersection(doc_words))
        
        if len(query_words) == 0:
            return 0.0
        
        # Combine LLM score with text overlap
        overlap_score = overlap / len(query_words)
        
        # Convert perplexity to relevance score (lower perplexity = more relevant)
        llm_score = 1.0 / (1.0 + perplexity) if perplexity != float('inf') else 0.0
        
        # Weighted combination
        final_score = 0.7 * overlap_score + 0.3 * llm_score
        
        return final_score


In [None]:
def test_qlm():
    
    # Small test dataset
    query = "machine learning algorithms"
    
    documents = [
        "Machine learning algorithms are computational methods that learn patterns from data to make predictions.",
        "Cooking recipes often involve following step-by-step instructions to prepare delicious meals.",
        "Deep learning is a subset of machine learning that uses neural networks with multiple layers.",
        "The weather today is sunny with a chance of rain in the afternoon.",
        "Supervised learning algorithms require labeled data to train predictive models.",
        "Basketball is a popular sport played with two teams of five players each."
    ]
    
    print(f"Query: '{query}'\n")
    print("Testing QLM Document Ranking:")
    print("=" * 50)
    
    # Initialize QLM
    qlm = SimpleQLM()
    
    # Score documents
    scored_docs = []
    for i, doc in enumerate(documents):
        score = qlm.score_document(query, doc)
        scored_docs.append((score, i, doc))
    
    # Sort by score (descending)
    scored_docs.sort(reverse=True)
    
    # Display results
    for rank, (score, doc_id, doc) in enumerate(scored_docs, 1):
        relevance = "🔥 HIGHLY RELEVANT" if score > 0.5 else "✅ RELEVANT" if score > 0.2 else "❌ NOT RELEVANT"
        print(f"Rank {rank}: Score = {score:.3f} {relevance}")
        print(f"Doc {doc_id}: {doc[:80]}{'...' if len(doc) > 80 else ''}")
        print()
    
    return scored_docs

In [None]:
def evaluate_effectiveness(scored_docs, ground_truth_relevant=[0, 2, 4]):
    print("Effectiveness Analysis:")
    print("=" * 30)
    
    # Get top 3 predictions
    top_3_indices = [doc_id for _, doc_id, _ in scored_docs[:3]]
    
    # Calculate precision@3
    relevant_in_top3 = len(set(top_3_indices).intersection(set(ground_truth_relevant)))
    precision_at_3 = relevant_in_top3 / 3
    
    # Calculate recall@3  
    recall_at_3 = relevant_in_top3 / len(ground_truth_relevant)
    
    print(f"Ground truth relevant docs: {ground_truth_relevant}")
    print(f"Top 3 predicted docs: {top_3_indices}")
    print(f"Precision@3: {precision_at_3:.3f}")
    print(f"Recall@3: {recall_at_3:.3f}")
    
    if precision_at_3 > 0.6:
        print("✅ QLM shows good effectiveness!")
    elif precision_at_3 > 0.3:
        print("⚠️  QLM shows moderate effectiveness")
    else:
        print("❌ QLM needs improvement")

In [10]:
if __name__ == "__main__":
    print("Simple QLM Test with Llama3")
    print("=" * 40)
    
    # Run the test
    results = test_qlm()
    
    # Evaluate effectiveness
    # Documents 0, 2, 4 are relevant to "machine learning algorithms"
    evaluate_effectiveness(results, ground_truth_relevant=[0, 2, 4])

Simple QLM Test with Llama3
Query: 'machine learning algorithms'

Testing QLM Document Ranking:
Rank 1: Score = 0.825 🔥 HIGHLY RELEVANT
Doc 0: Machine learning algorithms are computational methods that learn patterns from d...

Rank 2: Score = 0.603 🔥 HIGHLY RELEVANT
Doc 4: Supervised learning algorithms require labeled data to train predictive models.

Rank 3: Score = 0.589 🔥 HIGHLY RELEVANT
Doc 2: Deep learning is a subset of machine learning that uses neural networks with mul...

Rank 4: Score = 0.135 ❌ NOT RELEVANT
Doc 3: The weather today is sunny with a chance of rain in the afternoon.

Rank 5: Score = 0.130 ❌ NOT RELEVANT
Doc 1: Cooking recipes often involve following step-by-step instructions to prepare del...

Rank 6: Score = 0.124 ❌ NOT RELEVANT
Doc 5: Basketball is a popular sport played with two teams of five players each.

Effectiveness Analysis:
Ground truth relevant docs: [0, 2, 4]
Top 3 predicted docs: [0, 4, 2]
Precision@3: 1.000
Recall@3: 1.000
✅ QLM shows good effect