<a href="https://colab.research.google.com/github/aditya161205/NLP/blob/main/RAG_LangHug.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install -q streamlit langchain transformers sentence-transformers faiss-cpu pypdf accelerate langchain-community pyngrok

In [11]:
%%writefile app.py
import streamlit as st
import torch
import os
import tempfile
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.chains import RetrievalQA

st.set_page_config(page_title="RAG with LangChain & HuggingFace", layout="wide")

st.title("Upload and search from PDF")
st.write("""
This app implements a Retrieval-Augmented Generation (RAG) pipeline to answer questions about your PDF documents.
1. **Upload a PDF**: The document is loaded and split into smaller chunks.
2. **Create Embeddings**: Text chunks are converted into numerical vectors (embeddings) and stored in a searchable FAISS vector store.
3. **Ask a Question**: The app retrieves the most relevant chunks from the document and feeds them, along with your question, to a Large Language Model (LLM) to generate an answer.
""")

# --- Model and Tokenizer Caching ---
@st.cache_resource
def load_llm_and_tokenizer():
    """Load the LLM and tokenizer, and cache them."""
    model_id = "google/flan-t5-base"

    device = "cuda" if torch.cuda.is_available() else "cpu"
    st.info(f"Using device: {device.upper()}")

    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

    pipe = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        device=0 if device == "cuda" else -1
    )
    return HuggingFacePipeline(pipeline=pipe)

llm = load_llm_and_tokenizer()

@st.cache_resource
def load_embedding_model():
    """Load the embedding model, and cache it."""
    model_name = "sentence-transformers/all-mpnet-base-v2"
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model_kwargs = {'device': device}
    return HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

embeddings = load_embedding_model()

# --- Main App Logic ---
st.sidebar.header("Controls")
uploaded_file = st.sidebar.file_uploader("Upload your PDF document", type="pdf")

if 'vector_store' not in st.session_state:
    st.session_state.vector_store = None

if uploaded_file:
    if st.session_state.vector_store is None:
        with st.spinner("Processing PDF... This may take a moment."):
            try:

                with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
                    tmp_file.write(uploaded_file.getvalue())
                    tmp_file_path = tmp_file.name


                loader = PyPDFLoader(tmp_file_path)
                documents = loader.load()


                text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
                docs = text_splitter.split_documents(documents)

                st.session_state.vector_store = FAISS.from_documents(docs, embeddings)

                os.remove(tmp_file_path)

                st.sidebar.success(f"PDF processed successfully! Document split into {len(docs)} chunks.")

            except Exception as e:
                st.sidebar.error(f"An error occurred: {e}")
                st.session_state.vector_store = None

if st.session_state.vector_store:
    st.subheader("Ask a question about the document")

    question = st.text_input("Enter your question:", placeholder="e.g., What is the main topic of the document?")

    if st.button("Get Answer"):
        if question:
            with st.spinner("Searching for the answer..."):
                try:

                    retriever = st.session_state.vector_store.as_retriever()
                    chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

                    result = chain.run(question)

                    st.success("Answer:")
                    st.write(result)
                except Exception as e:
                    st.error(f"Failed to get answer: {e}")
        else:
            st.warning("Please enter a question.")
else:
    st.info("Please upload a PDF file using the sidebar to begin.")

Overwriting app.py


In [12]:
from pyngrok import ngrok
import os

ngrok.kill()

NGROK_AUTH_TOKEN = "2uwUMcpgBtZaJJ8WJUSR5RIgFuY_3sw1aKiFSaefXhso4yMsP" #-->paste token
if NGROK_AUTH_TOKEN:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
else:
    print("Consider adding an ngrok authtoken for a more stable connection.")

!nohup streamlit run app.py --server.port 8501 &

public_url = ngrok.connect(8501)
print(f"Click this link to view your Streamlit app: {public_url}")

nohup: appending output to 'nohup.out'
Click this link to view your Streamlit app: NgrokTunnel: "https://ff6deabd4fe2.ngrok-free.app" -> "http://localhost:8501"
