In [None]:
#The 4 major .py codes which contributes the whole RAG Pipeline 


#this is streamlit code the main code 

# streamlit.py
# Streamlit UI for AlgoAssist - An Intelligent DSA Learning Companion

import sys
import os
import streamlit as st

# Make sure we can import from the 'app' folder
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from app.chatbot import answer_query  # This handles the logic of answering user queries

# Basic setup for the page appearance
st.set_page_config(
    page_title="AlgoAssist",    # What shows on the browser tab
    page_icon="🤖",             # A fun little robot icon
    layout="centered"           # Keep everything centered for better UX
)

# A bit of custom CSS to make the page look nice
st.markdown("""
<style>
    .main-header {
        text-align: center;
        color: #4CAF50;
    }
    .subheader {
        text-align: center;
        font-size: 1.2em;
        margin-bottom: 20px;
    }
</style>
""", unsafe_allow_html=True)

# Main title and subtitle right at the top
st.markdown("<h1 class='main-header'>AlgoAssist 🤖</h1>", unsafe_allow_html=True)
st.markdown("<p class='subheader'>Your intelligent DSA learning companion</p>", unsafe_allow_html=True)

# If it's the user's first time here, we start a fresh chat
if 'messages' not in st.session_state:
    st.session_state.messages = []

# Loop through and show previous messages (both user and assistant)
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.write(message["content"])

# On the left: a list of clickable sample questions to get started quickly
with st.sidebar:
    st.header("Sample Questions")
    sample_questions = [
        "What is the time complexity of quicksort?",
        "Explain binary search trees",
        "How does a hash table work?",
        "Difference between BFS and DFS",
        "Explain dynamic programming"
    ]

    # If user clicks one, we simulate sending it as their question
    for question in sample_questions:
        if st.button(question):
            st.session_state.messages.append({"role": "user", "content": question})
            with st.spinner("Thinking..."):
                answer = answer_query(question)
            st.session_state.messages.append({"role": "assistant", "content": answer})
            st.rerun()  # Refresh to update the chat interface with new messages

# Main input box where the user types their custom query
query = st.chat_input("Ask about Data Structures & Algorithms")

if query:
    # Store the user’s question in chat history
    st.session_state.messages.append({"role": "user", "content": query})

    # Show the question in the chat bubble
    with st.chat_message("user"):
        st.write(query)

    # Generate the answer and show it
    with st.chat_message("assistant"):
        with st.spinner("Generating answer..."):
            answer = answer_query(query)
            st.write(answer)

    # Save the assistant's reply to chat history too
    st.session_state.messages.append({"role": "assistant", "content": answer})


In [None]:
# This is llm code 
# Backend logic for AlgoAssist - handles LLM, embeddings, and prompt creation

from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from chromadb import PersistentClient
from config import GROQ_API_KEY, HF_TOKEN  # API keys for Groq and Hugging Face
import streamlit as st

# Cache the LLM instance so it doesn't reload every time the app runs
@st.cache_resource
def get_llm():
    return ChatGroq(
        groq_api_key=GROQ_API_KEY,
        model_name="Llama3-8b-8192",  # Using Groq's LLaMA3 model
        temperature=0.5,              # Balanced creativity
        max_tokens=1024,              # Max length of response
        max_retries=3,                # Retry if API fails
        request_timeout=30            # Timeout in seconds
    )

# This sets up the vector DB retriever using Chroma and HuggingFace embeddings
def get_retriever(persist_dir="chroma_db"):
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

    # Connect to Chroma's persistent storage
    client = PersistentClient(path=persist_dir)
    collection = client.get_collection(name="langchain")

    # Return both collection and embeddings so they can be used together
    return collection, embeddings

# This builds the prompt template that guides the assistant's behavior
def create_prompt():
    return ChatPromptTemplate.from_template(
        """You are a helpful assistant specialized in Data Structures and Algorithms.
Use the following context to answer the question.
If the context is not relevant, provide an accurate answer based on your knowledge.

Context:
{context}

Question: {query}
Answer:"""
    )


In [None]:
from app.llm import get_llm, get_retriever, create_prompt

#from llm to chatbot 
def answer_query(query):
    llm = get_llm()
    collection, embeddings = get_retriever()
    prompt = create_prompt()

    embedding_vector = embeddings.embed_query(query)
    results = collection.query(query_embeddings=[embedding_vector], n_results=3)

    context = "\n".join(doc for doc in results["documents"][0])
    response = llm.invoke(prompt.format(context=context, query=query))
    return response.content.strip()

In [None]:
# build_chroma.py chroma vector database 
# Script to build Chroma vector database from a DSA PDF using HuggingFace embeddings

import os
import sys
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from config import HF_TOKEN  # Hugging Face token (in case it's needed)

# Add the root directory to sys.path to import config.py and access project files easily
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

def build_chroma_db(pdf_paths, persist_dir="chroma_db"):
    """
    Loads PDFs, splits the text, and builds a Chroma vector store
    using HuggingFace sentence embeddings.
    """
    docs = []

    # Load documents from all provided PDF paths
    for path in pdf_paths:
        loader = PyPDFLoader(path)
        docs.extend(loader.load())

    # Split documents into manageable chunks for embedding
    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = splitter.split_documents(docs)

    # Use HuggingFace transformer model to generate embeddings
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

    # Create and persist the Chroma vector database
    vectordb = Chroma.from_documents(
        documents=texts,
        embedding=embeddings,
        persist_directory=persist_dir
    )
    vectordb.persist()

    print(f"✅ Chroma DB built successfully with {len(texts)} document chunks.")


# Run this script directly to build the DB
if __name__ == "__main__":
    project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    
    # You can add multiple PDFs in this list if needed
    pdf_paths = [os.path.join(project_root, "data", "DSA BOOK.pdf")]

    build_chroma_db(pdf_paths)
