Install libraries

In [6]:
!pip install langchain langchain-huggingface langchain-community fastembed chromadb transformers torch


Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.0.1 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


2. Import Libraries

In [7]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import FastEmbedEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain_community.chat_models import ChatOllama
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import os, getpass

3. Authenticate Hugging Face

In [8]:
# Prompt for your Hugging Face API key if not already set
if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = getpass.getpass("Enter your Hugging Face API key: ")


4. Load and Split CTSE Lecture Notes

In [9]:
# Load your lecture notes PDF file
loader = PyPDFLoader("CTSE_Lecture_Notes.pdf")  # Replace with your file name
pages = loader.load_and_split()

# Split into chunks (important for context-aware retrieval)
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,
    chunk_overlap=100,
    length_function=len,
    add_start_index=True,
)
docs = splitter.split_documents(pages)
print(f"Split {len(pages)} pages into {len(docs)} chunks.")

Split 376 pages into 382 chunks.


5. Create Embeddings & Vector Store

In [10]:
persist_directory = "./chroma_langchain_db"

# Use FastEmbed to convert text into vectors
embeddings = FastEmbedEmbeddings()

if os.path.exists(persist_directory):
    # If already exists, load the existing DB
    vector_store = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
    print("Loaded existing vector store.")

else:
    # Otherwise, create and save
    vector_store = Chroma.from_documents(
        documents=docs,
        embedding=embeddings,
        persist_directory=persist_directory
    )
    vector_store.persist()
    print("Created and saved new vector store.")


  vector_store = Chroma(persist_directory=persist_directory, embedding_function=embeddings)


Loaded existing vector store.


6. Set Up Retriever

In [11]:
# Retriever with threshold filtering
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={
        "k": 3,
        "score_threshold": 0.5,
    }
)

7. Initialize LLaMA 3 via Ollama

In [12]:
# Initialize LLaMA 3 via Ollama
llm = ChatOllama(model="llama3")

  llm = ChatOllama(model="llama3")


8. Build Retrieval QA Chain

In [13]:
qa_chain = RetrievalQA.from_llm(llm=llm, retriever=retriever)

9. Define Chatbot Function

In [14]:
def ask(query: str):
    # Invoke the QA chain with the user query
    result = qa_chain.invoke({"query": query})

    # Extract and display the answer
    answer = result["result"]
    print(f"\nAnswer:\n{answer}\n")

    # Optionally print sources
    sources = result.get("source_documents", [])
    for i, doc in enumerate(sources, 1):
        print(f"Source {i}: {doc.metadata.get('source', 'N/A')}")


10. Batch Querying – Test Multiple Questions

In [20]:
ask("What is software engineering?")

ValueError: Ollama call failed with status code 500. Details: {"error":"model requires more system memory (5.9 GiB) than is available (3.8 GiB)"}