In [1]:
import sqlite3
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import torch

# Set device for CUDA
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the Sentence Transformer model
model = SentenceTransformer('sentence-transformers/all-roberta-large-v1')
model = model.to(device)

# Load FAISS index
def load_faiss_index(index_path='faiss_index.index'):
    index = faiss.read_index(index_path)
    return index

# Load all documents from SQLite database
def load_documents_from_db(db_name='rag_db.sqlite'):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    cursor.execute("SELECT id, document_text, reference_text FROM documents")
    documents = cursor.fetchall()

    conn.close()
    return documents

# Query function: input a sentence and return the most similar reference
def query_most_similar_document(query, top_k=1):
    # Load FAISS index
    index = load_faiss_index()

    # Convert query input to vector
    query_vector = model.encode([query], convert_to_tensor=True, device=device).cpu().numpy()

    # Search in FAISS index
    _, indices = index.search(query_vector, top_k)

    # Load all documents from the database
    documents = load_documents_from_db()

    # Get the most similar documents
    similar_documents = [documents[i] for i in indices[0]]

    # Return corresponding reference_text
    return [doc[2] for doc in similar_documents]

# Define the query sentence directly in the code
query = "direction of the mouthpiece. 'WHO ARE YOU?' 'RON WEASLEY!' Ron bellowed back, as though he and Uncle Vernon were speaking"

# Set top_k to 1 to get the most similar result
top_k = 1

# Call the query function
most_similar_references = query_most_similar_document(query, top_k=top_k)

# Print query results
if most_similar_references:
    print("The most similar reference_text is:")
    for ref in most_similar_references:
        print(ref)
else:
    print("No similar document found.")


  from .autonotebook import tqdm as notebook_tqdm


The most similar reference_text is:
from opposite ends of a football pitch. 'I'M A FRIEND OF HARRY'S FROM SCHOOL –' Uncle Vernon's small eyes swivelled around to Harry, who was rooted to the spot. 'THERE
