In [4]:
import sqlite3
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import torch

# Set device for CUDA
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the Sentence Transformer model
model = SentenceTransformer('sentence-transformers/all-roberta-large-v1')
model = model.to(device)

# Load FAISS index
def load_faiss_index(index_path='faiss_index_quantized.index'):
    # Load the FAISS index from the disk
    index = faiss.read_index(index_path)
    return index

# Load all documents and embeddings from SQLite database
def load_documents_from_db(db_name='rag_db.sqlite'):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    # Retrieve document ID, text, and embedding
    cursor.execute("SELECT id, document_text, reference_text FROM documents")
    documents = cursor.fetchall()

    conn.close()
    return documents

# Query function: input a sentence and return the most similar reference
def query_most_similar_document(query, top_k=1):
    # Load FAISS index
    index = load_faiss_index()

    # Convert query input to vector (embedding)
    query_vector = model.encode([query], convert_to_tensor=True, device=device).cpu().numpy()

    # Perform similarity search in FAISS index to find the most similar embeddings
    _, indices = index.search(query_vector, top_k)

    # Load all documents from the database
    documents = load_documents_from_db()

    # Retrieve the most similar document(s) from the database based on index result
    similar_documents = [documents[i] for i in indices[0]]

    # Extract and return the corresponding reference_text(s)
    return [doc[2] for doc in similar_documents]

# Define the query sentence directly in the code
query = "fire, blinking at the light. \"He sent the girl away?\" \"He did better than that,\" Tyrion said. \"First he made my brother tell me the truth. The girl was a whore, you see. Jaime arranged the whole affair, the road, the outlaws, all of it. He thought it was time I had a woman. He paid double for a maiden, knowing it would be my first time. \"After Jaime had made his confession, to drive home the lesson, Lord Tywin brought my wife in and gave her to his guards. They paid her fair enough. A silver for each man, how many whores command that high a price? He sat me down in the corner of the barracks and bade me watch, and at the end she had so many silvers the coins were slipping through her fingers and rolling on the floor, she . . .\" The smoke was stinging his eyes. Tyrion cleared his throat and turned away from the fire, to gaze out into darkness"

# Set top_k to 1 to get the most similar result
top_k = 1

# Call the query function to find the most similar reference(s)
most_similar_references = query_most_similar_document(query, top_k=top_k)

# Print the query results
if most_similar_references:
    print("The most similar reference_text is:")
    for ref in most_similar_references:
        print(ref)
else:
    print("No similar document found.")


The most similar reference_text is:
. "Lord Tywin had me go last," 404 GEORGE R.R. MARTIN he said in a quiet voice. "And he gave me a gold coin to pay her, because I was a Lannister, and worth more." After a time he heard the noise again, the rasp of steel on stone as
