In [7]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import textwrap

In [5]:
# Replace 'your_file_name.xlsx' with the actual path to your file
try:
    df = pd.read_excel('Database Requirements.xlsx',header=1)
    print("Data imported successfully!")
    print(df.head()) # Display the first 5 rows
except FileNotFoundError:
    print("Error: The file was not found. Please check the file path.")
except Exception as e:
    print(f"An error occurred: {e}")

Data imported successfully!
   No.                                           Question  \
0    1    How do I know if my tenancy agreement is valid?   
1    2  What should I do before signing a tenancy agre...   
2    3  Can my landlord enter my property without notice?   
3    4  What are the consequences of not paying rent o...   
4    5    How can I terminate my tenancy agreement early?   

                                Answer / Explanation Relevant Clause ID(s)  \
0  Must be in writing, signed by both parties, an...                  C001   
1  Review all terms carefully; ensure clarity; co...                  C001   
2  Landlord must give reasonable notice (usually ...                  C002   
3  Late payment may result in penalties, eviction...                  C003   
4  Early termination allowed only if clause exist...                  C004   

                                    Legal Commentary  \
0  Only agreements containing essential terms are...   
1  Legal review reduces 

In [10]:
# --- Configuration ---
FILE_NAME = 'Database Requirements.xlsx'
MODEL_NAME = 'all-MiniLM-L6-v2' # A fast and effective embedding model
K = 3 # Number of top results to retrieve

# --- 1. Data Preparation and Chunking ---
def load_and_chunk_data(file_path):
    """Loads the Excel data and converts each row into a structured chunk."""
    try:
        # Assuming the first row is the header (default behavior of read_excel)
        df = pd.read_excel(file_path, header=1)
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found. Please create a dummy file or ensure the path is correct.")
        # Create a dummy DataFrame if the file isn't found for demonstration
        df = pd.DataFrame({
            'Question': ["How do I know if my tenancy agreement is valid?"],
            'Answer / Explanation': ["Must be in writing, signed by both parties, and include essential terms."],
            'Legal Commentary': ["Only agreements containing essential terms are enforceable."],
            'Government Regulation / Explanation': ["N/A"]
        })

    chunks = []
    for index, row in df.iterrows():
        chunk = textwrap.dedent(f"""
            Question: {row['Question']}
            Answer/Explanation: {row['Answer / Explanation']}
            Legal Context: {row['Legal Commentary']}
            Regulation/Source: {row['Government Regulation / Explanation']}
        """).strip()
        chunks.append(chunk)

    return chunks

# --- 2. Indexing and Embedding ---
def create_faiss_index(chunks, model):
    """Generates embeddings and creates a Faiss index."""
    print("Generating embeddings and creating Faiss index...")
    # Generate embeddings for all chunks
    embeddings = model.encode(chunks)

    # Convert embeddings to float32 NumPy array (Faiss requirement)
    D = embeddings.shape[1] # Dimension of the embeddings
    index = faiss.IndexFlatL2(D) # L2 is Euclidean distance (common for embeddings)

    # Add the embeddings to the index
    index.add(np.array(embeddings).astype('float32'))
    print(f"Index created with {index.ntotal} documents.")
    return index

# --- 3. Retrieval and Generation (Simulated) ---
def retrieve_context(query, index, chunks, model, k):
    """Retrieves the top k most relevant context chunks."""
    # Encode the user query
    query_vector = model.encode([query]).astype('float32')

    # Search the Faiss index
    D, I = index.search(query_vector, k) # D=Distances, I=Indices
    
    # Retrieve the corresponding text chunks
    retrieved_context = [chunks[i] for i in I[0]]
    
    return retrieved_context

def generate_response_rag(query, retrieved_context):
    """
    Simulates the LLM Generation step by compiling a structured output.
    In a real RAG system, this is where you'd call an LLM API (e.g., GPT-4).
    """
    print("\n--- RAG Output (Simulated LLM) ---")
    print(f"User Query: {query}")
    print("-" * 30)
    
    # In a real system, the LLM synthesizes a single answer.
    # Here, we just print the relevant context to show the retrieval worked.
    
    print("Retrieved Context Chunks (LLM Input):")
    for i, chunk in enumerate(retrieved_context):
        print(f"\n[Chunk {i+1}]")
        print(chunk)
        
    print("\n\n*Note: A real LLM would now synthesize these chunks into a single, cohesive answer.*")

In [11]:
# --- Configuration ---
FILE_NAME = 'Database Requirements.xlsx'
MODEL_NAME = 'all-MiniLM-L6-v2' # A fast and effective embedding model
K = 3 # Number of top results to retrieve

# --- Main RAG Pipeline Execution ---
if __name__ == '__main__':
    # 1. Load Data and create Chunks
    data_chunks = load_and_chunk_data(FILE_NAME)

    # 2. Initialize Model and Create Index
    embedding_model = SentenceTransformer(MODEL_NAME)
    faiss_index = create_faiss_index(data_chunks, embedding_model)

    # 3. Test the RAG system with a query
    user_query = "What rules apply to my landlord entering my rental?"
    
    # 3.1 Retrieval
    context = retrieve_context(user_query, faiss_index, data_chunks, embedding_model, K)
    
    # 3.2 Generation (Simulated)
    generate_response_rag(user_query, context)

Generating embeddings and creating Faiss index...
Index created with 53 documents.

--- RAG Output (Simulated LLM) ---
User Query: What rules apply to my landlord entering my rental?
------------------------------
Retrieved Context Chunks (LLM Input):

[Chunk 1]
Question: Can my landlord enter my property, what is considered reasonable notice, and can I refuse entry?
Answer/Explanation: Landlord must give reasonable notice, typically 24 hours. Entry without notice is allowed only in emergencies. Tenants generally cannot refuse entry if notice is valid and purpose is legitimate.
Legal Context: Court balances landlord's access right with tenant’s quiet enjoyment.
Regulation/Source: HDB/URA – Entry must respect tenant's occupancy rights; emergency access allowed

[Chunk 2]
Question: Can my landlord enter my property without notice?
Answer/Explanation: Landlord must give reasonable notice (usually 24h), except emergencies. It is recommended to document the notice period in the agreement.
L