In [None]:
"""
This notebook loads the local Mistral-7B model, performs retrieval
from FAISS, and runs a RAG pipeline to answer user questions.
"""

In [None]:
from llama_cpp import Llama
import faiss
from pathlib import Path
import numpy as np

In [None]:
# --- Config ---
MODEL_PATH = "../models/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
VECTOR_INDEX_PATH = "../vectorstore/ctse_faiss.index"
CHUNKS_PATH = "../vectorstore/chunks.txt"
TOP_K = 3

In [None]:
# --- Load LLM ---
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=6, n_gpu_layers=20)

In [None]:
# --- Load vector index and chunks ---
index = faiss.read_index(VECTOR_INDEX_PATH)
with open(CHUNKS_PATH, 'r', encoding='utf-8') as f:
    chunks = f.read().split("\n\n")

In [None]:
# --- RAG Pipeline ---
def retrieve_chunks(query, k=TOP_K):
    from sentence_transformers import SentenceTransformer
    embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    query_vec = embedder.encode([query])
    D, I = index.search(np.array(query_vec).astype('float32'), k)
    return [chunks[i] for i in I[0]]

def generate_answer(query):
    context_chunks = retrieve_chunks(query)
    context = "\n---\n".join(context_chunks)

    prompt = f"""
You are an expert assistant helping students learn about Current Trends in Software Engineering (CTSE).
Use the following context to answer the question.

Context:
{context}

Question: {query}
Answer:
"""
    response = llm(prompt, max_tokens=512, stop=["\n\n"])
    return response['choices'][0]['text'].strip(), context_chunks

# --- Example Usage ---
# question = "What is the role of DevOps in modern software engineering?"
# answer, sources = generate_answer(question)
# print("Answer:\n", answer)
# print("\nSource Chunks:\n", sources)
