In [6]:
import os
import sqlite3
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from typing import List, Dict, Optional, Tuple, Union
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import re
import string
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

In [7]:
# Configuration
class Config:
    DB_PATH = "qna_database.db"
    EMBEDDING_MODEL = "all-mpnet-base-v2"  # Changed to match what's actually used
    CHUNK_SIZE = 500  # For batch processing
    VECTOR_DIM = 768  # Dimension for all-mpnet-base-v2 embeddings
    MIN_WORD_LENGTH = 3  # Minimum word length for keyword processing
    STOPWORDS = {
        'the', 'and', 'of', 'in', 'to', 'a', 'is', 'for', 'on', 'that', 'it', 'with', 'as', 'be', 'by', 'this', 'are', 'at'
    }

In [8]:
class QnADatabase:
    def __init__(self, db_path: str = Config.DB_PATH):
        """Initialize with optimized SQLite settings"""
        self.synonym_map = {
            'wallet': 'wallet hot cold storage address',
            'miner': 'mining validator node blockchain',
            'p2p': 'peer-to-peer decentralized distributed',
            'cold wallet': 'offline storage hardware wallet',
            'hot wallet': 'online wallet software wallet',
            'blockchain': 'distributed ledger',
            'smart contract': 'dapp decentralized application'
        }
        
        self.db_path = db_path
        self.conn = None
        self.embedding_model = None
        self._initialize_db()
        
    def _initialize_db(self):
        """Create database with optimized schema"""
        self.conn = sqlite3.connect(self.db_path, timeout=30)
        self.conn.execute("PRAGMA journal_mode = WAL")
        self.conn.execute("PRAGMA synchronous = NORMAL")
        self.conn.execute("PRAGMA cache_size = -100000")  # 100MB cache
        
        # Main Q&A table with additional metadata fields
        self.conn.execute("""
        CREATE TABLE IF NOT EXISTS qna_pairs (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            question TEXT NOT NULL,
            answer TEXT NOT NULL,
            category TEXT,
            word_count INTEGER,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            last_accessed TIMESTAMP,
            usage_count INTEGER DEFAULT 0,
            keywords TEXT,
            normalized_question TEXT
        )""")
        
        # Vector embeddings table
        self.conn.execute(f"""
        CREATE TABLE IF NOT EXISTS qna_embeddings (
            qna_id INTEGER PRIMARY KEY,
            question_vector BLOB,
            answer_vector BLOB,
            FOREIGN KEY (qna_id) REFERENCES qna_pairs(id)
        )""")
        
        # Create indexes
        self.conn.execute("CREATE INDEX IF NOT EXISTS idx_category ON qna_pairs(category)")
        self.conn.execute("CREATE INDEX IF NOT EXISTS idx_word_count ON qna_pairs(word_count)")
        self.conn.execute("CREATE INDEX IF NOT EXISTS idx_normalized_question ON qna_pairs(normalized_question)")
        
        # Full-text search with additional configuration
        self.conn.execute("""
        CREATE VIRTUAL TABLE IF NOT EXISTS qna_search 
        USING fts5(question, answer, keywords, tokenize='porter unicode61')
        """)

    def _get_embedding_model(self):
        """Get embedding model"""
        if self.embedding_model is None:
            self.embedding_model = SentenceTransformer(Config.EMBEDDING_MODEL)
        return self.embedding_model

    def _text_to_vector(self, text: str) -> bytes:
        """Convert text to compressed vector"""
        model = self._get_embedding_model()
        vector = model.encode(text)
        return vector.tobytes()

    def _vector_to_array(self, blob: bytes) -> np.ndarray:
        """Convert blob back to numpy array"""
        return np.frombuffer(blob, dtype=np.float32)
    
    def _normalize_text(self, text: str) -> str:
        """Normalize text for exact matching"""
        text = text.lower().strip()
        text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
        return text

    def keyword_search(self, query: str, limit: int = 5) -> List[Tuple[str, str]]:
        """Precision keyword search focusing on domain-specific terms"""
        cursor = self.conn.cursor()
        
        # Enhanced technical term extraction
        def extract_technical_terms(text):
            # Extract special crypto/blockchain terms (like P2P, PoW, etc.)
            special_terms = re.findall(r'\b([A-Za-z0-9]{2,}\-[A-Za-z0-9]{2,}|[A-Z]{2,}|[a-z]{3,}\s[a-z]{3,})\b', text.lower())
            
            # Extract common crypto terms
            crypto_terms = re.findall(r'\b(wallet|blockchain|miner|mining|transaction|key|p2p|peer-to-peer|smart contract|dapp|token)\b', text.lower())
            
            # Combine and deduplicate
            terms = list(set(special_terms + crypto_terms))
            
            # Filter out stopwords
            return [term for term in terms if term not in Config.STOPWORDS]
        
        # Get domain-specific terms from query
        query_terms = extract_technical_terms(query)
        
        if not query_terms:
            return []
        
        # Prepare search conditions
        conditions = []
        params = []
        
        # Search for each term in both questions and answers
        for term in query_terms:
            if ' ' in term:  # Handle multi-word terms
                conditions.append("(question LIKE ? OR answer LIKE ? OR keywords LIKE ?)")
                params.extend([f'%{term}%', f'%{term}%', f'%{term}%'])
            else:
                conditions.append("(question LIKE ? OR answer LIKE ? OR keywords LIKE ?)")
                params.extend([f'% {term} %', f'% {term} %', f'% {term} %'])
        
        # Build the query
        query_sql = f"""
        SELECT question, answer, keywords 
        FROM qna_pairs
        WHERE {' OR '.join(conditions)}
        ORDER BY 
            CASE 
                WHEN question LIKE ? THEN 3
                WHEN answer LIKE ? THEN 2
                WHEN keywords LIKE ? THEN 1
                ELSE 0
            END DESC
        LIMIT ?
        """.replace('\n', ' ').strip()
        
        # The first term gets priority in ordering
        primary_term = query_terms[0]
        params.extend([f'%{primary_term}%', f'%{primary_term}%', f'%{primary_term}%', limit])
        
        # Execute query
        cursor.execute(query_sql, params)
        results = cursor.fetchall()
        
        # If we don't have enough results, try a more flexible search
        if len(results) < limit:
            fallback_query = """
            SELECT question, answer 
            FROM qna_pairs
            WHERE question LIKE ? OR answer LIKE ? OR keywords LIKE ?
            LIMIT ?
            """
            cursor.execute(fallback_query, [f'%{query_terms[0]}%', f'%{query_terms[0]}%', f'%{query_terms[0]}%', limit - len(results)])
            additional_results = cursor.fetchall()
            results.extend(additional_results)
        
        # Remove duplicates while preserving order
        seen = set()
        unique_results = []
        for q, a in results:
            norm_q = self._normalize_text(q)
            if norm_q not in seen:
                seen.add(norm_q)
                unique_results.append((q, a))
        
        return unique_results[:limit]
    
    def _extract_keywords(self, text: str) -> str:
        """Enhanced keyword extraction with technical term handling"""
        # Preserve important punctuation and technical terms
        text = re.sub(r"[^\w\s\-./]", '', text.lower())
        
        # Extract special crypto terms (like P2P, PoW, etc.)
        special_terms = re.findall(r'\b([A-Za-z0-9]{2,}\-[A-Za-z0-9]{2,}|[A-Z]{2,})\b', text)
        
        # Tokenize and filter
        words = []
        for word in text.split():
            # Split hyphenated terms if they're not special terms
            if '-' in word and word not in special_terms:
                words.extend(word.split('-'))
            else:
                words.append(word)
        
        # Filter and count
        words = [word for word in words 
                 if len(word) >= Config.MIN_WORD_LENGTH 
                 and word not in Config.STOPWORDS]
        
        # Add special terms back
        words.extend(special_terms)
        
        # Count and get most important
        word_counts = Counter(words)
        return ' '.join([word for word, count in word_counts.most_common(15)])
    
    def batch_insert(self, qna_list: List[Dict]):
        """Optimized bulk insert with embeddings and keyword processing"""
        if not qna_list:
            return
            
        with self.conn:
            cursor = self.conn.cursor()
            
            # Check for existing questions to prevent duplicates
            existing_questions = set()
            cursor.execute("SELECT normalized_question FROM qna_pairs")
            for row in cursor.fetchall():
                existing_questions.add(row[0])
            
            # Process and filter out duplicates
            processed_qna = []
            for qna in qna_list:
                norm_question = self._normalize_text(qna['question'])
                if norm_question not in existing_questions:
                    # Extract keywords
                    question_keywords = self._extract_keywords(qna['question'])
                    answer_keywords = self._extract_keywords(qna['answer'])
                    combined_keywords = f"{question_keywords} {answer_keywords}"
                    
                    processed_qna.append({
                        'question': qna['question'],
                        'answer': qna['answer'],
                        'category': qna.get('category'),
                        'word_count': len(qna['answer'].split()),
                        'keywords': combined_keywords,
                        'normalized_question': norm_question
                    })
                    existing_questions.add(norm_question)
            
            if not processed_qna:
                print("No new Q&A pairs to insert")
                return
            
            # Insert Q&A pairs
            cursor.executemany("""
            INSERT INTO qna_pairs (question, answer, category, word_count, keywords, normalized_question)
            VALUES (?, ?, ?, ?, ?, ?)
            """, [(q['question'], q['answer'], q['category'], q['word_count'], 
                  q['keywords'], q['normalized_question']) for q in processed_qna])
            
            # Get inserted IDs
            cursor.execute("SELECT last_insert_rowid() - ? + 1, last_insert_rowid()", (len(processed_qna),))
            first_id, last_id = cursor.fetchone()
            
            # Generate and store embeddings
            for i in tqdm(range(len(processed_qna)), desc="Generating embeddings"):
                qna = processed_qna[i]
                q_vector = self._text_to_vector(qna['question'])
                a_vector = self._text_to_vector(qna['answer'])
                cursor.execute("""
                INSERT INTO qna_embeddings (qna_id, question_vector, answer_vector)
                VALUES (?, ?, ?)
                """, (first_id + i, q_vector, a_vector))
            
            # Update full-text search index
            cursor.executemany("""
            INSERT INTO qna_search (question, answer, keywords)
            VALUES (?, ?, ?)
            """, [(q['question'], q['answer'], q['keywords']) for q in processed_qna])
    
    def semantic_search(self, query: str, top_k: int = 5, threshold: float = 0.15) -> List[Tuple[str, str, float, int]]:
        """Semantic search that returns questions, answers, scores, and ids"""
        return self._semantic_search_with_scores(query, top_k, threshold)
    
    def _semantic_search_with_scores(self, query: str, top_k: int, threshold: float):
        """Internal method that returns full results"""
        # Expand query with synonyms
        expanded_query = self._expand_query(query)
        query_vec = self._text_to_vector(expanded_query)
        query_arr = self._vector_to_array(query_vec)
        
        # Get all stored embeddings
        cursor = self.conn.execute("""
        SELECT qna_pairs.id, qna_pairs.question, qna_pairs.answer, 
               qna_embeddings.question_vector, qna_embeddings.answer_vector,
               qna_pairs.keywords, qna_pairs.category
        FROM qna_pairs
        JOIN qna_embeddings ON qna_pairs.id = qna_embeddings.qna_id
        """)
        
        results = []
        for qid, question, answer, q_vec_blob, a_vec_blob, keywords, category in cursor.fetchall():
            q_vec = self._vector_to_array(q_vec_blob)
            a_vec = self._vector_to_array(a_vec_blob)
            
            # Calculate similarities
            q_sim = cosine_similarity([query_arr], [q_vec])[0][0]
            a_sim = cosine_similarity([query_arr], [a_vec])[0][0]
            
            # Contextual scoring
            context_score = 0
            if keywords:
                kw_vec = self._text_to_vector(keywords)
                kw_sim = cosine_similarity([query_arr], [self._vector_to_array(kw_vec)])[0][0]
                context_score += 0.2 * kw_sim
            
            # Category boosting
            category_boost = 0
            if category and any(cat_term in query.lower() for cat_term in category.lower().split()):
                category_boost = 0.1
            
            # Combined score
            similarity = (
                0.4 * q_sim + 
                0.3 * a_sim + 
                0.2 * context_score + 
                0.1 * category_boost
            )
            
            if similarity >= threshold:
                results.append((question, answer, similarity, qid))
        
        # Sort and return
        return sorted(results, key=lambda x: x[2], reverse=True)[:top_k]

    def retrieve_contexts(self, query: str, top_k: int = 3) -> List[Dict]:
        """RAG-optimized context retrieval"""
        results = self._semantic_search_with_scores(query, top_k=top_k*2)  # Changed to use _semantic_search_with_scores
        
        # Process results for RAG
        contexts = []
        seen_ids = set()
        
        for question, answer, score, qid in results:
            if qid not in seen_ids:
                contexts.append({
                    'question': question,
                    'answer': answer,
                    'score': score,
                    'keywords': self._extract_keywords(f"{question} {answer}"),
                    'combined_text': f"Question: {question}\nAnswer: {answer}"
                })
                seen_ids.add(qid)
                if len(contexts) >= top_k:
                    break
        
        return contexts
    
    def keyword_search(self, query: str, limit: int = 5) -> List[Tuple[str, str]]:
        """Precision keyword search"""
        cursor = self.conn.cursor()
        
        # Normalize query
        query = self._normalize_text(query)
        query_terms = [term for term in query.split() 
                      if len(term) >= Config.MIN_WORD_LENGTH 
                      and term not in Config.STOPWORDS]
        
        if not query_terms:
            return []
        
        # Get potential matches
        all_qna = []
        cursor.execute("""
        SELECT question, answer, keywords 
        FROM qna_pairs
        """)
        all_qna = cursor.fetchall()
        
        # Score matches
        scored_results = []
        for question, answer, keywords in all_qna:
            content = f"{question} {answer} {keywords}".lower()
            norm_question = self._normalize_text(question)
            
            # Exact matches
            exact_matches = sum(
                1 for term in query_terms 
                if f" {term} " in f" {content} "
            )
            
            # Partial matches
            partial_matches = sum(
                1 for term in query_terms 
                if term in content
            )
            
            # Question terms get higher weight
            question_terms = sum(
                2 for term in query_terms
                if term in norm_question
            )
            
            # Combine scores
            score = (
                (exact_matches * 3) + 
                (partial_matches * 1) + 
                question_terms
            )
            
            if score > 0:
                scored_results.append((question, answer, score))
        
        # Remove duplicates
        unique_results = {}
        for question, answer, score in sorted(scored_results, key=lambda x: x[2], reverse=True):
            norm_q = self._normalize_text(question)
            if norm_q not in unique_results or score > unique_results[norm_q][2]:
                unique_results[norm_q] = (question, answer, score)
        
        # Return top results
        sorted_results = sorted(unique_results.values(), key=lambda x: x[2], reverse=True)
        return [(q, a) for q, a, _ in sorted_results[:limit]]

    def hybrid_search(self, query: str, top_k: int = 5) -> List[Tuple[str, str]]:
        """Hybrid search combining exact and semantic matches"""
        # First try exact matches
        norm_query = self._normalize_text(query)
        cursor = self.conn.cursor()
        cursor.execute("""
            SELECT question, answer FROM qna_pairs 
            WHERE normalized_question LIKE ? 
            LIMIT ?
        """, (f"%{norm_query}%", top_k))
        exact_matches = cursor.fetchall()
        
        if exact_matches:
            return exact_matches[:top_k]
        
        # Fall back to semantic search
        return [x[:2] for x in self._semantic_search_with_scores(query, top_k, 0.1)]
    
    def _expand_query(self, query: str) -> str:
        """Query expansion with synonyms"""
        expanded_terms = []
        for term in query.lower().split():
            if term in self.synonym_map:
                expanded_terms.append(self.synonym_map[term])
            else:
                expanded_terms.append(term)
        
        # Add reverse synonyms
        for key, values in self.synonym_map.items():
            if any(term in values for term in query.lower().split()):
                expanded_terms.append(key)
        
        return ' '.join(expanded_terms)

    def get_all_data(self, limit: Optional[int] = None) -> pd.DataFrame:
        """Export all data with optional limit"""
        query = "SELECT * FROM qna_pairs"
        if limit:
            query += f" LIMIT {limit}"
        return pd.read_sql(query, self.conn)

    def optimize(self):
        """Database maintenance"""
        print("Optimizing database...")
        self.conn.execute("VACUUM")
        self.conn.execute("ANALYZE")
        self.conn.execute("PRAGMA optimize")

    def close(self):
        """Clean up resources"""
        if self.conn:
            self.conn.close()
        if self.embedding_model:
            del self.embedding_model

In [9]:
# Data Processing Utilities
class DataProcessor:
    @staticmethod
    def parse_text_file(file_path: str, question_prefix: str = "Q:", answer_prefix: str = "A:") -> List[Dict]:
        """Improved text file parser with better error handling"""
        qna_pairs = []
        current_q = None
        current_a = []
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                for line in f:
                    line = line.strip()
                    if not line:  # Skip empty lines
                        continue
                        
                    if line.startswith(question_prefix):
                        if current_q is not None:
                            qna_pairs.append({
                                "question": current_q,
                                "answer": "\n".join(current_a).strip()
                            })
                        current_q = line[len(question_prefix):].strip()
                        current_a = []
                    elif line.startswith(answer_prefix):
                        current_a.append(line[len(answer_prefix):].strip())
                    elif current_a:  # Only add to answer if we're in an answer block
                        current_a.append(line)
                
                # Add the last pair if it exists
                if current_q is not None:
                    qna_pairs.append({
                        "question": current_q,
                        "answer": "\n".join(current_a).strip()
                    })
        except Exception as e:
            print(f"Error processing file {file_path}: {str(e)}")
            return []
            
        return qna_pairs

    @staticmethod
    def chunk_list(lst: List, chunk_size: int):
        """Yield successive chunk_size chunks from lst"""
        for i in range(0, len(lst), chunk_size):
            yield lst[i:i + chunk_size]

In [10]:
def main():
    # Initialize database with cleanup
    if os.path.exists(Config.DB_PATH):
        os.remove(Config.DB_PATH)
    db = QnADatabase()
    
    # Sample data for demonstration
    sample_data = [
        {
            "question": "What is P2P? ",
            "answer": "A Peer-to-Peer (P2P) payment system, seamlessly integrated with blockchain technology, a decentralized application (DApp), and MetaMask wallet, orchestrates a streamlined and secure process for transparent transactions among users."
        },
        {
            "question": "Compare and Contrast Private and Public Key: ",
            "answer": "The private key allows you to have access to your funds through the crypto wallet. it is used to send Bitcoin and must be protected and secured. As for the public key, it is used to receive Bitcoin and can be published anywhere safely."
        }
    ]
    
    # Auto-categorize questions
    def detect_category(question: str) -> str:
        question_lower = question.lower()
        if 'p2p' in question_lower or 'peer-to-peer' in question_lower:
            return "networking"
        elif 'private key' in question_lower or 'public key' in question_lower:
            return "security"
        elif 'blockchain' in question_lower:
            return "fundamentals"
        elif 'proof of work' in question_lower or 'pow' in question_lower:
            return "consensus"
        elif 'wallet' in question_lower:
            return "wallets"
        elif 'smart contract' in question_lower:
            return "development"
        else:
            return "general"
    
    # Add categories to sample data
    for item in sample_data:
        item["category"] = detect_category(item["question"])
    
    # Process and insert data
    print("Inserting sample data...")
    db.batch_insert(sample_data)
    
    # For large files
    try:
        file_path = "/kaggle/input/db-19-txt"
        if os.path.exists(file_path):
            print("Processing large file...")
            qna_pairs = DataProcessor.parse_text_file(file_path)
            
            # Auto-categorize parsed questions
            for item in qna_pairs:
                item["category"] = detect_category(item["question"])
            
            print(f"Processing {len(qna_pairs)} Q&A pairs...")
            for chunk in DataProcessor.chunk_list(qna_pairs, Config.CHUNK_SIZE):
                db.batch_insert(chunk)
    except Exception as e:
        print(f"Error processing large file: {str(e)}")
        import traceback
        traceback.print_exc()
    
    # Test with new questions not in the sample data
    test_questions = [
        "What are the advantages of P2P networks?",
        "Explain the difference between hot and cold wallets",
        "What is the role of miners in blockchain?"
    ]
    
    print("\n" + "="*50)
    print("Testing with new questions not in sample data")
    print("="*50)
    
    for question in test_questions:
        print(f"\nQuestion: '{question}'")
        
        # Semantic search
        print("\nSemantic search results:")
        semantic_results = db.semantic_search(question)  # Now returns (q, a, score, id)
        if semantic_results:
            for i, (q, a, score, _) in enumerate(semantic_results, 1):
                print(f"{i}. Question: {q}")
                print(f"   Answer: {a}")
                print(f"   Score: {score:.3f}")
                print(f"   {'-'*50}")
        else:
            print("No semantic matches found")
        
        # Keyword search
        print("\nKeyword search results:")
        keyword_results = db.keyword_search(question)
        if keyword_results:
            for i, (q, a) in enumerate(keyword_results, 1):
                print(f"{i}. Question: {q}")
                print(f"   Answer: {a}")
                print(f"   {'-'*50}")
        else:
            print("No keyword matches found")
    
    # Export data with categories
    df = db.get_all_data(limit=10)
    print("\nSample data from database:")
    print(df[['question', 'category']].head())
    
    # Show category distribution
    print("\nCategory distribution:")
    print(df['category'].value_counts())
    
    # Maintenance
    db.optimize()
    db.close()

if __name__ == "__main__":
    main()

Inserting sample data...


Generating embeddings:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Testing with new questions not in sample data

Question: 'What are the advantages of P2P networks?'

Semantic search results:


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

1. Question: What is P2P? 
   Answer: A Peer-to-Peer (P2P) payment system, seamlessly integrated with blockchain technology, a decentralized application (DApp), and MetaMask wallet, orchestrates a streamlined and secure process for transparent transactions among users.
   Score: 0.366
   --------------------------------------------------
2. Question: Compare and Contrast Private and Public Key: 
   Answer: The private key allows you to have access to your funds through the crypto wallet. it is used to send Bitcoin and must be protected and secured. As for the public key, it is used to receive Bitcoin and can be published anywhere safely.
   Score: 0.176
   --------------------------------------------------

Keyword search results:
1. Question: What is P2P? 
   Answer: A Peer-to-Peer (P2P) payment system, seamlessly integrated with blockchain technology, a decentralized application (DApp), and MetaMask wallet, orchestrates a streamlined and secure process for transparent transactions am

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

1. Question: Compare and Contrast Private and Public Key: 
   Answer: The private key allows you to have access to your funds through the crypto wallet. it is used to send Bitcoin and must be protected and secured. As for the public key, it is used to receive Bitcoin and can be published anywhere safely.
   Score: 0.222
   --------------------------------------------------
2. Question: What is P2P? 
   Answer: A Peer-to-Peer (P2P) payment system, seamlessly integrated with blockchain technology, a decentralized application (DApp), and MetaMask wallet, orchestrates a streamlined and secure process for transparent transactions among users.
   Score: 0.153
   --------------------------------------------------

Keyword search results:
No keyword matches found

Question: 'What is the role of miners in blockchain?'

Semantic search results:


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

1. Question: What is P2P? 
   Answer: A Peer-to-Peer (P2P) payment system, seamlessly integrated with blockchain technology, a decentralized application (DApp), and MetaMask wallet, orchestrates a streamlined and secure process for transparent transactions among users.
   Score: 0.309
   --------------------------------------------------

Keyword search results:
1. Question: What is P2P? 
   Answer: A Peer-to-Peer (P2P) payment system, seamlessly integrated with blockchain technology, a decentralized application (DApp), and MetaMask wallet, orchestrates a streamlined and secure process for transparent transactions among users.
   --------------------------------------------------

Sample data from database:
                                        question    category
0                                  What is P2P?   networking
1  Compare and Contrast Private and Public Key:     security

Category distribution:
category
networking    1
security      1
Name: count, dtype: int64
Optimizin