In [None]:
"""
This notebook loads the local Mistral-7B model, performs retrieval
from FAISS, and runs a RAG pipeline to answer user questions.
"""

In [2]:
from llama_cpp import Llama
import faiss
from pathlib import Path
import numpy as np

In [3]:
# --- Config ---
MODEL_PATH = "../models/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
VECTOR_INDEX_PATH = "../vectorstore/ctse_faiss.index"
CHUNKS_PATH = "../vectorstore/chunks.txt"
TOP_K = 3

In [4]:
# --- Load LLM ---
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=6, n_gpu_layers=20)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from ../models/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 

In [5]:
# --- Load vector index and chunks ---
index = faiss.read_index(VECTOR_INDEX_PATH)
with open(CHUNKS_PATH, 'r', encoding='utf-8') as f:
    chunks = f.read().split("\n\n")

In [6]:
# --- RAG Pipeline ---
def retrieve_chunks(query, k=TOP_K):
    from sentence_transformers import SentenceTransformer
    embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    query_vec = embedder.encode([query])
    D, I = index.search(np.array(query_vec).astype('float32'), k)
    print("Retrieved indices:", I)
    print("Retrieved distances:", D)
    return [chunks[i] for i in I[0]]

def generate_answer(query):
    context_chunks = retrieve_chunks(query)
    context = "\n---\n".join(context_chunks)

    prompt = f"""
You are an expert assistant helping students learn about Current Trends in Software Engineering (CTSE).
Use the following context to answer the question.

Context:
{context}

Question: {query}
Answer:
"""
    response = llm(prompt, max_tokens=512, stop=["\n\n"])
    return response['choices'][0]['text'].strip(), context_chunks


In [7]:
print("Total Chunks:", len(chunks))
print("FAISS index dimensions:", index.d)

Total Chunks: 2
FAISS index dimensions: 384


In [8]:
# --- Example Usage ---
question = "What is DevOps?"
answer, sources = generate_answer(question)

print("Answer:\n", answer)
print("\nSource Chunks:\n", sources)

  from .autonotebook import tqdm as notebook_tqdm


Retrieved indices: [[ 1  0 -1]]
Retrieved distances: [[6.1679852e-01 6.3236934e-01 3.4028235e+38]]


llama_perf_context_print:        load time = 1103116.47 ms
llama_perf_context_print: prompt eval time = 1103078.44 ms /  1779 tokens (  620.06 ms per token,     1.61 tokens per second)
llama_perf_context_print:        eval time =   22290.75 ms /    32 runs   (  696.59 ms per token,     1.44 tokens per second)
llama_perf_context_print:       total time = 1127703.61 ms /  1811 tokens


Answer:
 DevOps is the combination of cultural philosophies, practices, and tools that increases an organization’s ability to deliver applications and services at high velocity.

Source Chunks:
 ['requests (MRs) and CI/CD to further unify software development and infrastructure operations. GitOps incorporates managing both infrastructure and applications as code. • Cloud Infrastructure - Cloud provides more flexibility, scalability and toolsets for organizations to implement DevOps culture and practices. Serverless architecture in cloud brings down the efforts of DevOps teams as it eliminates server management operations. • Continuous Monitoring, Logging and Alerting - Organizations monitor metrics and logs to see how application and infrastructure performance impacts the experience of their product’s end user. Combined with real time alerts organizations can do a real time analysis on the application status. DevOps Tools and Technologies Beyond DevOps "the practice of integrating secu