In [None]:
#!pip install mlflow>=2.8.1
!pip install chromadb
!pip install langchain
!pip install langchain-community
#!pip install 'mlflow[genai]'
!pip install keras-nlp
!pip install keras
!pip install selenium
!pip install faiss-gpu
!pip install unstructured
!pip install sentence-transformers
!pip install transformers
!pip install torch
!pip install torchvision
!pip install accelerate
!pip install bitsandbytes
!pip install pypdf
!pip install rank_bm25
!pip install ragas
!pip install streamlit
!pip install numpy
!pip install pandas

Collecting chromadb
  Downloading chromadb-0.6.3-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.23.0-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.21.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.31.1-py

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

#**Streamlit**

In [None]:
%%writefile app.py

#Imports

from langchain.llms import HuggingFacePipeline
from transformers import pipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import streamlit as st

from langchain_community.document_loaders import WebBaseLoader,SeleniumURLLoader,PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import DataFrameLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.retrievers import BM25Retriever, EnsembleRetriever,ParentDocumentRetriever
from langchain.storage import InMemoryStore


from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQAWithSourcesChain


import warnings


warnings.filterwarnings("ignore")
from transformers import pipeline




from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)

from ragas import evaluate



#using HuggingFaceEmbeddings to generate embedding vectors of the data.

model_name = "BAAI/bge-small-en-v1.5"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

#model pipeline


model_name = "meta-llama/Llama-2-7b-chat-hf"

tokenizer_llama_2 = AutoTokenizer.from_pretrained(model_name)


def load_quantized_model(model_name:str):
  """
  model_name : name of the model
  return : loads the quantized model
  """
  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_compute_dtype=torch.bfloat16
  )
  model = AutoModelForCausalLM.from_pretrained(
      model_name,
      quantization_config=bnb_config,
      device_map="auto",
      use_auth_token=True,
      torch_dtype=torch.bfloat16,
  )
  return model

model_llama_2 = load_quantized_model(model_name)




pipe_llama_2 = pipeline(
    "text-generation",
    model=model_llama_2,
    tokenizer=tokenizer_llama_2,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    do_sample=True,
    max_new_tokens=1000,
    temperature=0.7,
    top_k=10,
    top_p=0.60,
    num_return_sequences=1,
    add_special_tokens=True,
    eos_token_id=tokenizer_llama_2.eos_token_id,
    return_full_text=False,

)

llama_pipe = HuggingFacePipeline(pipeline=pipe_llama_2)


#Prompt

from langchain.prompts import PromptTemplate

B_INST,E_INST = "[INST]","[/INST]"
B_SYS,E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
def get_prompt(instruction,new_system_prompt):
  SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
  PROMPT =  B_INST + SYSTEM_PROMPT + instruction + E_INST
  return PROMPT

sys_prompt = """ You are helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any other text after the answer is done.
if a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. """

instruction = """ CONTEXT:/n/n {context} /n/n
Question: {question}"""


prompt_template = get_prompt(instruction,sys_prompt)

llama_prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])



PDF_STORAGE_PATH = '/content/drive/MyDrive/mlflow_rag/document_store/pdfs'

CHROMA_VECTOR_DB = Chroma(persist_directory="/content/drive/MyDrive/mlflow_rag/document_store/chroma",collection_name="full_documents", embedding_function=hf)


def save_uploaded_file(uploaded_file):
    file_path = PDF_STORAGE_PATH + uploaded_file.name
    with open(file_path, "wb") as file:
        file.write(uploaded_file.getbuffer())
    return file_path

def load_pdf_documents(file_path):
    document_loader = PyPDFLoader(file_path)
    return document_loader.load()

parent_splitter = RecursiveCharacterTextSplitter(separators=['\n\n','\n',','], chunk_size=2000,chunk_overlap=75)
child_splitter = RecursiveCharacterTextSplitter(separators=['\n\n','\n',','], chunk_size=400,chunk_overlap=75)
store = InMemoryStore()
full_doc_retriever= ParentDocumentRetriever(
    vectorstore=CHROMA_VECTOR_DB,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,)


def generate_answer(user_query, context_documents):
    context_text = "\n\n".join([doc.page_content for doc in context_documents])
    qa_pdf_llama_2 = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llama_pipe,
    chain_type="stuff",
    retriever=full_doc_retriever,
    chain_type_kwargs={"prompt": llama_prompt, "document_variable_name": "context"}
)
    return qa_pdf_llama_2.invoke({"question": user_query})['answer']







# UI Configuration


st.title("📘 Document Agent")
st.markdown("### Your Intelligent Document Agent")
st.markdown("---")

# File Upload Section
uploaded_pdf = st.file_uploader(
    "Upload Research Document (PDF)",
    type="pdf",
    help="Select a PDF document for analysis",
    accept_multiple_files=False

)

if uploaded_pdf:
    saved_path = save_uploaded_file(uploaded_pdf)
    raw_docs = load_pdf_documents(saved_path)
    full_doc_retriever.add_documents(raw_docs)


    st.success("✅ Document processed successfully! Ask your questions below.")

    user_input = st.chat_input("Enter your question about the document...")

    if user_input:
        with st.chat_message("user"):
            st.write(user_input)

        with st.spinner("Analyzing document..."):
            relevant_docs = full_doc_retriever.get_relevant_documents(user_input)
            ai_response = generate_answer(user_input, relevant_docs)

        with st.chat_message("assistant", avatar="🤖"):
            st.write(ai_response)

Overwriting app.py


#**Streamlit Run**

In [None]:
!npm install localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K
added 22 packages in 4s
[1G[0K⠧[1G[0K
[1G[0K⠧[1G[0K3 packages are looking for funding
[1G[0K⠧[1G[0K  run `npm fund` for details
[1G[0K⠧[1G[0K

In [None]:
!streamlit run /content/app.py &>/content/logs.txt &

In [None]:
!npx localtunnel --port 8501 & curl ipv4.icanhazip.com

34.87.66.148
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0Kyour url is: https://eager-ducks-eat.loca.lt
