In [73]:
from sentence_transformers import SentenceTransformer
import faiss
import pandas as pd
import logging
import os
from datetime import datetime


In [74]:
def setup_logging():
    """Setup logging configuration"""
    # Create log directory if it doesn't exist
    log_dir = "../logs"
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    
    # Create log filename with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_file = os.path.join(log_dir, f"rag_system_{timestamp}.log")
    
    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_file),
            logging.StreamHandler()  # Also print to console
        ]
    )
    
    logging.info(f"Logging initialized. Log file: {log_file}")
    return log_file

def load_training_data(file_path):
    """Load training data from CSV file"""
    df = pd.read_csv(file_path)
    return df["passage"].tolist()

def load_test_data(file_path):
    """Load test data from CSV file"""
    df = pd.read_csv(file_path)
    return dict(zip(df["question"].tolist(), df["answer"].tolist()))


In [75]:
def initialize_model():
    """Initialize the sentence transformer model"""
    return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def get_embeddings(text, model):
    """Get embeddings for given text"""
    return model.encode(text)

In [76]:
def create_index(embeddings):
    """Create FAISS index from embeddings"""
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    return index

def search_similar_passages(query, index, training_data, model, k=1):
    """Search for similar passages using FAISS index"""
    query_embedding = get_embeddings([query], model)
    distances, indices = index.search(query_embedding, k)
    return [training_data[i] for i in indices[0]]


In [77]:
def evaluate_retrieval_accuracy(test_data, index, training_data, model, num_samples=10):
    """Evaluate retrieval accuracy on a subset of test data"""
    test_subset = dict(list(test_data.items())[:num_samples])
    correct = 0
    
    for question, answer in test_subset.items():
        results = search_similar_passages(question, index, training_data, model, k=1)
        retrieved_text = " ".join(results).lower()
        
        if answer.lower() in retrieved_text:
            correct += 1
    
    accuracy = correct / len(test_subset)
    return accuracy, correct, len(test_subset)




In [78]:

def test_sample_questions(test_data, index, training_data, model, num_samples=3):
    """Test the RAG system with sample questions"""
    print("=== RAG System Test ===")
    print(f"Training passages: {len(training_data)}")
    print(f"Test questions: {len(test_data)}")
    print("="*50)
    
    for i, (question, answer) in enumerate(list(test_data.items())[:num_samples]):
        print(f"\nTest {i+1}:")
        print(f"Question: {question}")
        print(f"Expected Answer: {answer}")
        
        results = search_similar_passages(question, index, training_data, model, k=1)
        print("Retrieved passages:")
        for j, passage in enumerate(results, 1):
            print(f"  {j}. {passage[:100]}...")
        
        print("-" * 50)

In [79]:


def print_system_summary(training_data, test_data, embeddings):
    """Print system summary information"""
    logging.info("Printing system summary...")
    print("=== System Summary ===")
    print(f"Model: sentence-transformers/all-MiniLM-L6-v2")
    print(f"Training passages: {len(training_data)}")
    print(f"Test questions: {len(test_data)}")
    print(f"Embedding dimension: {embeddings.shape[1]}")
    print("✓ Semantic search with FAISS")
    print("✓ Top-5 passage retrieval")
    print("• No answer generation (retrieval only)")
    logging.info("System summary printed")


In [80]:
# Main execution function
def main():
    """Main function to run the RAG system"""
    # Setup logging
    log_file = setup_logging()
    logging.info("Starting RAG system execution...")
    
    try:
        # Load data
        training_data = load_training_data("../data/training-rag-mini-wikipedia_question-answer.csv")
        test_data = load_test_data("../data/test-rag-mini-wikipedia_question-answer.csv")
        
        # Initialize model and create embeddings
        model = initialize_model()
        embeddings = get_embeddings(training_data, model)
        index = create_index(embeddings)
        
        # Print system summary
        print_system_summary(training_data, test_data, embeddings)
        print("\n" + "="*50)

        # Test sample questions
        print("\n=== Sample Questions Test ===")
        test_sample_questions(test_data, index, training_data, model)
        
        # Evaluate accuracy
        print("\n=== Evaluation ===")
        accuracy, correct, total = evaluate_retrieval_accuracy(test_data, index, training_data, model)
        print(f"Accuracy: {accuracy:.1%} ({correct}/{total})")
        
        logging.info("RAG system execution completed successfully")
        logging.info(f"Log file saved at: {log_file}")
        
    except Exception as e:
        logging.error(f"Error during execution: {str(e)}")
        raise

# Run the main function
if __name__ == "__main__":
    main()


2025-09-26 13:41:59,626 - INFO - Logging initialized. Log file: ../logs/rag_system_20250926_134159.log
2025-09-26 13:41:59,626 - INFO - Starting RAG system execution...
2025-09-26 13:41:59,644 - INFO - Use pytorch device_name: mps
2025-09-26 13:41:59,644 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
Batches: 100%|██████████| 100/100 [00:02<00:00, 36.49it/s]
2025-09-26 13:42:03,359 - INFO - Printing system summary...
2025-09-26 13:42:03,359 - INFO - System summary printed


=== System Summary ===
Model: sentence-transformers/all-MiniLM-L6-v2
Training passages: 3200
Test questions: 918
Embedding dimension: 384
✓ Semantic search with FAISS
✓ Top-5 passage retrieval
• No answer generation (retrieval only)


=== Sample Questions Test ===
=== RAG System Test ===
Training passages: 3200
Test questions: 918

Test 1:
Question: Was Abraham Lincoln the sixteenth President of the United States?
Expected Answer: yes


Batches: 100%|██████████| 1/1 [00:00<00:00, 199.40it/s]


Retrieved passages:
  1. Young Abraham Lincoln...
--------------------------------------------------

Test 2:
Question: Did Lincoln sign the National Banking Act of 1863?
Expected Answer: yes


Batches: 100%|██████████| 1/1 [00:00<00:00, 211.25it/s]


Retrieved passages:
  1. Lincoln believed in the Whig theory of the presidency, which left Congress to write the laws while h...
--------------------------------------------------

Test 3:
Question: Did his mother die of pneumonia?
Expected Answer: no


Batches: 100%|██████████| 1/1 [00:00<00:00, 207.17it/s]


Retrieved passages:
  1. An autopsy performed after his death revealed grave problems with his stomach and other organs of hi...
--------------------------------------------------

=== Evaluation ===


Batches: 100%|██████████| 1/1 [00:00<00:00, 164.48it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 164.53it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 214.13it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 204.87it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 221.14it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 200.07it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 205.10it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 227.53it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 198.26it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 221.83it/s]
2025-09-26 13:42:03,448 - INFO - RAG system execution completed successfully
2025-09-26 13:42:03,448 - INFO - Log file saved at: ../logs/rag_system_20250926_134159.log


Accuracy: 50.0% (5/10)
