In [1]:
import os
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import faiss
from typing import List, Dict, Tuple
import time

In [8]:
class QAAssistantWithRankVicuna:
    def __init__(self, documents: List[str]):
        self.documents = documents
        
        # Initialize embedding model for initial retrieval
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        
        # Initialize cross-encoder for reranking (RankVicuna-style)
        from sentence_transformers import CrossEncoder
        self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
        
        # Initialize QA model
        self.qa_model = pipeline('question-answering', 
                               model='distilbert-base-cased-distilled-squad',
                               return_scores=True)
        
        # Build FAISS index for fast similarity search
        self.build_index()
    
    def build_index(self):
        print("Building document index...")
        embeddings = self.embedding_model.encode(self.documents)
        
        # Create FAISS index
        dimension = embeddings.shape[1]
        self.index = faiss.IndexFlatIP(dimension)  # Inner product for cosine similarity
        
        # Normalize embeddings for cosine similarity
        faiss.normalize_L2(embeddings)
        self.index.add(embeddings.astype('float32'))
        
        print(f"Index built with {len(self.documents)} documents")
    
    def retrieve_initial_candidates(self, query: str, k: int = 10) -> List[Tuple[str, float]]:
        query_embedding = self.embedding_model.encode([query])
        faiss.normalize_L2(query_embedding)
        
        scores, indices = self.index.search(query_embedding.astype('float32'), k)
        
        candidates = []
        for score, idx in zip(scores[0], indices[0]):
            if idx < len(self.documents):  # Valid index
                candidates.append((self.documents[idx], float(score)))
        
        return candidates
    
    def rerank_with_rankvicuna(self, query: str, candidates: List[Tuple[str, float]], 
                              top_k: int = 5) -> List[Tuple[str, float]]:
        if not candidates:
            return []
        
        # Prepare query-document pairs for reranking
        query_doc_pairs = [[query, doc] for doc, _ in candidates]
        
        # Get reranking scores using CrossEncoder
        rerank_scores = self.reranker.predict(query_doc_pairs)
        
        # Combine documents with new scores
        reranked = [(candidates[i][0], float(score)) 
                   for i, score in enumerate(rerank_scores)]
        
        # Sort by reranking score (descending)
        reranked.sort(key=lambda x: x[1], reverse=True)
        
        return reranked[:top_k]
    
    def generate_answer_default_prompt(self, query: str, context: str) -> Dict:
        try:
            result = self.qa_model(question=query, context=context)
            return {
                'answer': result['answer'],
                'confidence': result['score'],
                'prompt_type': 'default'
            }
        except:
            return {
                'answer': "I couldn't find a relevant answer in the provided context.",
                'confidence': 0.0,
                'prompt_type': 'default'
            }
    
    def generate_answer_custom_prompt(self, query: str, context: str) -> Dict:
        # Custom prompt template - more structured
        custom_context = f"""
        Context Information:
        {context}
        
        Instructions: Based on the above context, provide a comprehensive and accurate answer to the question below. 
        If the context doesn't contain enough information, clearly state what information is missing.
        
        Question: {query}
        
        Answer:"""
        
        try:
            # For this example, we'll use the same QA model but with modified context
            # In practice, you might use a different model that accepts custom prompts
            result = self.qa_model(question=query, context=custom_context)
            return {
                'answer': result['answer'],
                'confidence': result['score'],
                'prompt_type': 'custom'
            }
        except:
            return {
                'answer': "I couldn't find a relevant answer in the provided context.",
                'confidence': 0.0,
                'prompt_type': 'custom'
            }
    
    def answer_question(self, query: str, use_custom_prompt: bool = False) -> Dict:
        start_time = time.time()
        
        # Step 1: Initial retrieval
        print(f"Retrieving candidates for: '{query}'")
        candidates = self.retrieve_initial_candidates(query, k=10)
        
        if not candidates:
            return {
                'answer': "No relevant documents found.",
                'confidence': 0.0,
                'retrieval_time': time.time() - start_time,
                'documents_used': 0
            }
        
        # Step 2: Reranking with RankVicuna
        print("Reranking candidates...")
        reranked_docs = self.rerank_with_rankvicuna(query, candidates, top_k=3)
        
        # Step 3: Combine top documents for context
        combined_context = "\n\n".join([doc for doc, _ in reranked_docs])
        
        # Step 4: Generate answer with chosen prompt template
        if use_custom_prompt:
            result = self.generate_answer_custom_prompt(query, combined_context)
        else:
            result = self.generate_answer_default_prompt(query, combined_context)
        
        # Add metadata
        result.update({
            'retrieval_time': time.time() - start_time,
            'documents_used': len(reranked_docs),
            'reranking_scores': [score for _, score in reranked_docs],
            'top_documents': [doc[:100] + "..." for doc, _ in reranked_docs]
        })
        
        return result

In [6]:
# Example usage and performance comparison
def main():
    # Sample documents (replace with your dataset)
    sample_documents = [
        "Python is a high-level programming language known for its simplicity and readability. It was created by Guido van Rossum and first released in 1991.",
        "Machine learning is a subset of artificial intelligence that enables computers to learn and improve from experience without being explicitly programmed.",
        "Natural language processing (NLP) is a branch of AI that helps computers understand, interpret and manipulate human language.",
        "Deep learning uses neural networks with multiple layers to model and understand complex patterns in data.",
        "Computer vision is a field of AI that trains computers to interpret and understand visual information from the world.",
        "Reinforcement learning is a type of machine learning where an agent learns to make decisions by performing actions in an environment.",
        "Data science combines statistics, mathematics, and computer science to analyze and interpret complex data.",
        "Cloud computing provides on-demand access to computing resources over the internet without direct active management.",
        "Cybersecurity involves protecting computer systems, networks, and data from digital attacks and unauthorized access.",
        "Blockchain is a distributed ledger technology that maintains a continuously growing list of records linked using cryptography."
    ]
    
    # Initialize QA Assistant
    qa_assistant = QAAssistantWithRankVicuna(sample_documents)
    
    # Test questions
    test_questions = [
        "What is machine learning?",
        "Who created Python?",
        "What is deep learning?",
        "How does blockchain work?"
    ]
    
    print("=== Performance Comparison: Default vs Custom Prompt ===\n")
    
    for question in test_questions:
        print(f"Question: {question}")
        print("-" * 50)
        
        # Test with default prompt
        result_default = qa_assistant.answer_question(question, use_custom_prompt=False)
        print(f"Default Prompt Answer: {result_default['answer']}")
        print(f"Confidence: {result_default['confidence']:.3f}")
        
        # Test with custom prompt
        result_custom = qa_assistant.answer_question(question, use_custom_prompt=True)
        print(f"Custom Prompt Answer: {result_custom['answer']}")
        print(f"Confidence: {result_custom['confidence']:.3f}")
        
        print(f"Retrieval Time: {result_default['retrieval_time']:.3f}s")
        print(f"Documents Used: {result_default['documents_used']}")
        print(f"Reranking Scores: {[f'{s:.3f}' for s in result_default['reranking_scores']]}")
        print("\n" + "="*60 + "\n")


In [7]:
if __name__ == "__main__":
    
    main()

Device set to use cpu


Building document index...
Index built with 10 documents
=== Performance Comparison: Default vs Custom Prompt ===

Question: What is machine learning?
--------------------------------------------------
Retrieving candidates for: 'What is machine learning?'
Reranking candidates...
Default Prompt Answer: a subset of artificial intelligence
Confidence: 0.285
Retrieving candidates for: 'What is machine learning?'
Reranking candidates...
Custom Prompt Answer: a subset of artificial intelligence
Confidence: 0.301
Retrieval Time: 0.242s
Documents Used: 3
Reranking Scores: ['11.272', '4.944', '-3.607']


Question: Who created Python?
--------------------------------------------------
Retrieving candidates for: 'Who created Python?'
Reranking candidates...
Default Prompt Answer: Guido van Rossum
Confidence: 0.997
Retrieving candidates for: 'Who created Python?'
Reranking candidates...
Custom Prompt Answer: Guido van Rossum
Confidence: 0.996
Retrieval Time: 0.110s
Documents Used: 3
Reranking Sco