In [6]:
# 1. Install required packages (run once)
!pip install langchain llama-cpp-python sentence-transformers faiss-cpu pypdf



In [7]:
# 2. Import libraries
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import LlamaCpp
from langchain.chains import RetrievalQA

In [8]:
# Load Lecture Notes
from langchain.document_loaders import PyPDFLoader

# Load a single PDF
loader = PyPDFLoader("D:/SLIIT/Y4_S2/CTSE/lec notes/CTSE_Lecture_Notes.pdf")
documents = loader.load()

In [9]:
# Process Documents
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [10]:
# Create Vector Store
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(texts, embeddings)

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from tqdm.autonotebook import tqdm, trange


In [11]:
# Set Up LLM
from langchain.llms import LlamaCpp

llm = LlamaCpp(
    model_path = r"D:\llama-2-7b-chat\llama-2-7b-chat.Q4_K_M.gguf",
    temperature=0.5,
    max_tokens=2000,
    n_ctx=2048
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from D:\llama-2-7b-chat\llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count 

In [12]:
import os
print(os.path.exists(r"D:\llama-2-7b-chat\llama-2-7b-chat.Q4_K_M.gguf"))

True


In [13]:
# Create QA Chain
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(),
    return_source_documents=True
)

In [None]:
# Create Chat Interface
def ask_question(question, show_sources=True, max_answer_length=1000):
    """
    Enhanced question-answering function for CTSE Lecture Notes
    
    Parameters:
        question (str): Your question about the lecture content
        show_sources (bool): Whether to display source documents (default: True)
        max_answer_length (int): Truncate answer if exceeds this length (default: 1000)
    """
    try:
        print(f"\n❓ Question: {question}\n")
        
        # Get the answer
        result = qa_chain({"query": question})
        
        # Process the answer
        answer = result["result"].strip()
        if len(answer) > max_answer_length:
            answer = answer[:max_answer_length] + "... [answer truncated]"
        
        print(f"💡 Answer:\n{answer}\n")
        
        # Show sources if available and requested
        if show_sources and result["source_documents"]:
            print("📚 Source Documents:")
            for i, doc in enumerate(result["source_documents"], 1):
                source = doc.metadata.get("source", "Unknown document")
                page = doc.metadata.get("page", "N/A")
                print(f"  {i}. {source} (Page {page})")
                # Optional: Show brief excerpt
                # print(f"     Excerpt: {doc.page_content[:100]}...")
        elif show_sources:
            print("⚠️ No specific sources found for this answer")
            
    except Exception as e:
        print(f"❌ Error processing your question: {str(e)}")
        if "maximum context length" in str(e):
            print("Try asking a shorter question or reducing max_tokens in your LLM config.")
    
    print("\n" + "━" * 50)  # Visual separator

# Example usage:
ask_question("What are the key characteristics of microservices architecture?")


❓ Question: What are the key characteristics of microservices architecture?



  result = qa_chain({"query": question})
