In [None]:
import sqlite3
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import torch

# Set device for CUDA
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the Sentence Transformer model
model = SentenceTransformer('sentence-transformers/all-roberta-large-v1')
model = model.to(device)

# Function to retrieve the most similar record's reference embedding using input text
def get_similar_reference(input_text, db_name='rag_db.sqlite', faiss_index_path='faiss_index.index'):
    """
    Retrieve the reference embedding corresponding to the most similar input embedding from the database.
    
    Parameters:
    - input_text: The input text to search for in the database.
    - db_name: The name of the SQLite database to retrieve embeddings from.
    - faiss_index_path: Path to the saved FAISS index for fast similarity search.
    
    Returns:
    - reference_embedding: The reference embedding corresponding to the most similar input embedding.
    """
    # Step 1: Convert the input text to its embedding
    input_vector = model.encode([input_text], convert_to_tensor=True, device=device)
    input_vector = input_vector.cpu().numpy()  # Convert to numpy array
    
    # Step 2: Load the FAISS index
    faiss_index = faiss.read_index(faiss_index_path)
    
    # Step 3: Search the FAISS index for the most similar input embedding
    D, I = faiss_index.search(input_vector, k=1)  # k=1 to get the top 1 most similar vector
    print(f"Distances: {D}, Indices: {I}")
    
    # Step 4: Retrieve the index of the most similar vector (convert numpy.int64 to int)
    most_similar_index = int(I[0][0])  # Convert to normal int type to avoid datatype mismatch
    
    # Step 5: Retrieve the corresponding reference embedding from the database
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    cursor.execute("SELECT reference_embedding FROM documents LIMIT 1 OFFSET ?", (most_similar_index,))
    result = cursor.fetchone()
    
    if result:
        # Convert the BLOB data (reference embedding) back to numpy array
        reference_embedding = np.frombuffer(result[0], dtype=np.float32)
        conn.close()
        return reference_embedding
    else:
        conn.close()
        return None  # Return None if no reference found

# Example usage
input_text = "fire, blinking at the light. \"He sent the girl away?\" \"He did better than that,\" Tyrion said. \"First he made my brother tell me the truth. The girl was a whore, you see. Jaime arranged the whole affair, the road, the outlaws, all of it. He thought it was time I had a woman. He paid double for a maiden, knowing it would be my first time. \"After Jaime had made his confession, to drive home the lesson, Lord Tywin brought my wife in and gave her to his guards. They paid her fair enough. A silver for each man, how many whores command that high a price? He sat me down in the corner of the barracks and bade me watch, and at the end she had so many silvers the coins were slipping through her fingers and rolling on the floor, she . . .\" The smoke was stinging his eyes. Tyrion cleared his throat and turned away from the fire, to gaze out into darkness"
reference_embedding = get_similar_reference(input_text)

# Print the corresponding reference embedding
if reference_embedding is not None:
    print("Reference embedding:", reference_embedding)
else:
    print("No matching reference found.")


Distances: [[1.5071963e-12]], Indices: [[1]]
Reference embedding: [ 0.00360759  0.01398184  0.00724497 ...  0.00887039  0.00590016
 -0.01719082]


: 