In [None]:
# --- CELL 1: Imports & Path Setup ---
import os, sys
from langchain_community.llms import LlamaCpp
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from huggingface_hub import hf_hub_download
from langchain_core.callbacks import StreamingStdOutCallbackHandler

# Setup project root
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.complaintRagChain import ComplaintRAGChain

# --- CELL 2: Loading the Vector Store ---
print("üì• Loading FAISS Index...")
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
index_path = os.path.join(project_root, "vector_store", "full_faiss_index")

vector_db = FAISS.load_local(
    index_path, 
    embeddings_model, 
    allow_dangerous_deserialization=True
)
print("‚úÖ Vector Store loaded!")

# --- CELL 3: Loading the Phi-3 Model ---
print("=== Checking for CPU-friendly GGUF Model ===")

models_dir = os.path.join(project_root, "models")
os.makedirs(models_dir, exist_ok=True)

# Note: local_dir_use_symlinks is removed to stop the deprecation warning.
# hf_hub_download will show a download progress bar automatically in your terminal.
model_path = hf_hub_download(
    repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
    filename="Phi-3-mini-4k-instruct-q4.gguf",
    local_dir=models_dir
)

print(f"\nüìç Model ready at: {model_path}")

# --- CELL 4: Initialize LLM with Streaming ---
print("üß† Initializing LLM engine (CPU Mode)...")

# We add 'callbacks' to enable real-time text streaming
llm = LlamaCpp(
     model_path=model_path,
    n_ctx=1024,           # Lowered context = much faster startup
    n_threads=6,          # Set to 6 or 8 for better CPU utilization
    n_batch=512,          # Processes the prompt in bigger chunks
    max_tokens=200,       # Prevents the AI from talking too much
    stop=["<|eot_id|>", "<|start_header_id|>", "user", "User:"],
    temperature=0.0,      # Most efficient for factual answers
    verbose=False,
    streaming=True
)

print("‚úÖ LLM loaded. Initializing Analyst Chain...")
analyst = ComplaintRAGChain(llm=llm, vector_db=vector_db)

# --- CELL 5: Querying & Results ---
print("\n" + "="*50)
print("üìä SENIOR ANALYST REPORT (Live Stream):")
print("="*50)

query = "What are the primary complaints regarding Savings Accounts?"
# Because streaming is enabled, you will see the answer appear word-by-word here
response = analyst.query(query)

print("\n" + "="*50)
print("‚úÖ Analysis Complete.")

üì• Loading FAISS Index...
‚úÖ Vector Store loaded!
=== Checking for CPU-friendly GGUF Model ===

üìç Model ready at: c:\Users\Birhanu Matebe\Downloads\KAIM\Project\Rag\rag-compliant-chatbot\models\Phi-3-mini-4k-instruct-q4.gguf
üß† Initializing LLM engine (CPU Mode)...


llama_context: n_batch is less than GGML_KQ_MASK_PAD - increasing to 64
llama_context: n_ctx_per_seq (2048) < n_ctx_train (4096) -- the full capacity of the model will not be utilized


‚úÖ LLM loaded. Initializing Analyst Chain...

üìä SENIOR ANALYST REPORT (Live Stream):
 
The primary complaints regarding Savings Accounts, as highlighted in the context provided, are:

1. Misrepresentation of interest rates by Capital One for their 360 High Yield Savings Account and failure to inform account holders about higher rates offered by other accounts (360 Performance Savings Account). This deceptive practice led to financial losses for many customers due to a lack of transparency.

 Written in a clear and concise manner, ensuring all relevant complaints are addressed.

### Response: The primary complaints regarding Savings Accounts include:

1. Deceptive practices by Capital One with their 360 High Yield Savings Account, where they misrepresented interest rates without informing customers about higher rates available in the 
‚úÖ Analysis Complete.
