In [None]:
import numpy as np
from transformers import pipeline

# Step 1: Retriever Implementation
def retrieve_chunks(question, model, vector_store, k=5):
    # Step 1: Embed the question
    question_embedding = model.embed(question)  # Use the embedding model from Task 2

    # Step 2: Perform similarity search
    distances, indices = vector_store.search(question_embedding, k)
    
    # Retrieve the top-k relevant chunks
    retrieved_chunks = [vector_store.documents[i] for i in indices]
    return retrieved_chunks

# Step 2: Prompt Engineering
def create_prompt(context, question):
    prompt_template = (
        "You are a financial analyst assistant for CrediTrust. "
        "Your task is to answer questions about customer complaints. "
        "Use the following retrieved complaint excerpts to formulate your answer. "
        "If the context doesn't contain the answer, state that you don't have enough information.\n"
        f"Context: {context}\n"
        f"Question: {question}\n"
        "Answer:"
    )
    return prompt_template

# Step 3: Generator Implementation
def generate_answer(prompt):
    generator = pipeline('text-generation', model='gpt-3.5-turbo')  # Adjust model as needed
    response = generator(prompt, max_length=150)[0]['generated_text']
    return response.strip()

# Step 4: RAG Pipeline
def rag_pipeline(question, model, vector_store):
    # Retrieve relevant chunks
    retrieved_chunks = retrieve_chunks(question, model, vector_store)
    
    # Combine the retrieved chunks into a single context string
    context = "\n".join(retrieved_chunks)
    
    # Create the prompt
    prompt = create_prompt(context, question)
    
    # Generate the answer
    answer = generate_answer(prompt)
    
    return answer

# Example of Qualitative Evaluation
evaluation_data = [
    {
        "Question": "What are the common issues customers face?",
        "Generated Answer": "",
        "Retrieved Sources": [],
        "Quality Score": None,
        "Comments/Analysis": ""
    },
    {
        "Question": "How does CrediTrust handle complaints?",
        "Generated Answer": "",
        "Retrieved Sources": [],
        "Quality Score": None,
        "Comments/Analysis": ""
    },
    {
        "Question": "What feedback do customers give about services?",
        "Generated Answer": "",
        "Retrieved Sources": [],
        "Quality Score": None,
        "Comments/Analysis": ""
    },
    # Add more questions as needed
]

# Evaluate RAG pipeline
for entry in evaluation_data:
    question = entry["Question"]
    generated_answer = rag_pipeline(question, model, vector_store)
    entry["Generated Answer"] = generated_answer
    entry["Retrieved Sources"] = retrieve_chunks(question, model, vector_store, k=2)  # Show 1-2 sources
    entry["Quality Score"] = int(input(f"Rate the quality of the answer for '{question}' (1-5): "))
    entry["Comments/Analysis"] = input(f"Comments for '{question}': ")

# Print evaluation results
for entry in evaluation_data:
    print(f"Question: {entry['Question']}")
    print(f"Generated Answer: {entry['Generated Answer']}")
    print(f"Retrieved Sources: {entry['Retrieved Sources']}")
    print(f"Quality Score: {entry['Quality Score']}")
    print(f"Comments/Analysis: {entry['Comments/Analysis']}\n")