In [14]:
!pip install openai langchain-community langchain-chroma langchain langchain-text-splitters langchain_core langchain_huggingface pypdf

In [2]:
import os
from openai import OpenAI

from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma


In [3]:
# CONFIG FILE
# OpenRouter (OpenAI-Router-compatible)
BASE_URL = "https://openrouter.ai/api/v1"
OPEN_API_KEY = "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"
MODEL = "nvidia/nemotron-nano-9b-v2:free"

client = OpenAI(
    base_url=BASE_URL,
    api_key=OPEN_API_KEY,
)

# Document path
PDF_PATH = "/content/ML_BOOK.pdf"

# Chunking
CHUNK_SIZE = 150
CHUNK_OVERLAP = 40

# Dense embedding model
EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"

# Retrieval strategy
SEARCH_TYPE = "hybrid"
TOP_K = 5

# Vector DB
PERSIST_DIRECTORY = "/content/chroma_db"


In [4]:
# DATA LOADING

def pdf_loading(pdf_path):
  loader=PyPDFLoader(pdf_path)
  return loader.load()



In [5]:
# CHUNKING OF DOCUMENT
def text_chunking(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,
        chunk_overlap=CHUNK_OVERLAP,
        length_function=len
    )
    chunks = text_splitter.split_documents(documents)
    return chunks


In [6]:
# STORE CHUNKS EMBEDDINGS IN VECTORSTORE

def store_embedding(chunks):
  embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True}
  )

  vectorstore=Chroma.from_documents(
      documents=chunks,
      embedding=embeddings,
      persist_directory=PERSIST_DIRECTORY
  )


In [7]:
# ((1))   VECTOR DATA BASE BUILDING FORM RAW PDF TO VECTORSTORE

def chroma_db_building(pdf_path):
  text=pdf_loading(pdf_path)
  chunks=text_chunking(text)
  store_embedding(chunks)

In [8]:
# MAIN FILE FOR DATABASE BUILDING
chroma_db_building(PDF_PATH)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [12]:
# Initialize embeddings ONCE (outside the function)
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True}
)

# Load existing Chroma vector store
vectorstore = Chroma(
    persist_directory=PERSIST_DIRECTORY,
    embedding_function=embeddings
)

# RETRIEVAL
def retrieve_context(query, k=TOP_K):
    results = vectorstore.similarity_search(query, k=k)
    context = "\n\n".join(doc.page_content for doc in results)
    return context


In [13]:
def rag_chat(user_query, conversation_history=[]):
    """RAG-based chat function."""
    # Retrieve relevant context
    context = retrieve_context(user_query)

    # Create prompt with context
    system_message = f"""You are a helpful assistant for a company.
Use the following context to answer the user's question.
If the answer is not in the context, say so politely.

Context:
{context}
"""

    # Prepare messages
    messages = [{"role": "system", "content": system_message}]
    messages.extend(conversation_history)
    messages.append({"role": "user", "content": user_query})

    # Get response from OpenRouter
    completion = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        extra_headers={
            "HTTP-Referer": "https://company-chatbot.local",
            "X-Title": "Company RAG Chatbot",
        }
    )

    response = completion.choices[0].message.content
    return response, context

In [14]:
def run_chatbot():
    """Run the interactive RAG chatbot."""
    print("Company RAG Chatbot Started!")
    print("Type 'quit' to exit, 'context' to see retrieved context\n")

    conversation_history = []
    last_context = ""

    while True:
        user_input = input("You: ").strip()

        if user_input.lower() == 'quit':
            print("Goodbye!")
            break

        if user_input.lower() == 'context':
            print(f"\nLast Retrieved Context:\n{last_context}\n")
            continue

        if not user_input:
            continue

        try:
            response, context = rag_chat(user_input, conversation_history)
            last_context = context

            print(f"\nBot: {response}\n")

            # Update conversation history
            conversation_history.append({"role": "user", "content": user_input})
            conversation_history.append({"role": "assistant", "content": response})

            # Keep only last 10 messages to manage context window
            if len(conversation_history) > 10:
                conversation_history = conversation_history[-10:]

        except Exception as e:
            print(f"Error: {e}\n")

# Run the chatbot
run_chatbot()

Company RAG Chatbot Started!
Type 'quit' to exit, 'context' to see retrieved context

You: hi

Bot: Hello! How can I assist you today? ðŸ˜Š


You: i need to know about supervised learning 

Bot: Supervised learning is a type of machine learning where the algorithm learns from a labeled dataset, meaning each training example is paired with an output label. The goal is for the model to learn the mapping from inputs to correct outputs, so it can make accurate predictions on new, unseen data. Key steps include gathering labeled data, training a model (often model-based), and using this model to generalize patterns from the examples it was trained on. This contrasts with unsupervised learning, which uses unlabeled data to find hidden structures.


You: what is decision try give me information in bullet points 

Bot: - A decision tree is an **acyclic graph** (directed acyclic graph) used for decision-making.  
- It makes decisions by **splitting data** at each node based on feature values.  