<a href="https://colab.research.google.com/github/algo-maniac/pdf_query/blob/master/DataScienceProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install these libraries
!pip install sentence-transformers pypdf
!pip install bitsandbytes



In [None]:
# Install these libraries
!pip install langchain-community langchain accelerate faiss-gpu



In [None]:
# Install these libraries
!pip install streamlit transformers



In [None]:
# Before running this, run the above cells first
%%writefile app.py
import streamlit as st
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
import tempfile
from torch import cuda, bfloat16
from transformers import AutoConfig, AutoModelForCausalLM,AutoTokenizer
import transformers
from transformers import StoppingCriteria, StoppingCriteriaList
from langchain.llms import HuggingFacePipeline
import torch

# Function to trim Unhelpful Answer
def trim_unhelpful_part(s):
    s1=""
    for i in range(len(s)):
        s2=s[i:i+16]
        if s2=="Unhelpful Answer":
            break
        s1+=s[i]
    return s1

# Function to load PDF documents
def load_documents(file):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
        temp_file.write(file.read())
        temp_file_path = temp_file.name
    loader = PyPDFLoader(temp_file_path)
    return loader.load()

# Function to split documents into texts
def split_documents(documents, chunk_size=500, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return text_splitter.split_documents(documents)

# Function to create embeddings
def create_embeddings(model_name, device='cuda'):
    model_name = "sentence-transformers/all-mpnet-base-v2"
    model_kwargs = {"device": "cuda"}
    embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
    return embeddings

# Function to create vector store
def create_vector_store(texts, embeddings):
    return FAISS.from_documents(texts, embeddings)

# Function to setup a QA chain
def setup_retrieval_qa_chain(llm, db):
    chain = ConversationalRetrievalChain.from_llm(llm, db.as_retriever(), return_source_documents=False)
    return chain

# Streamlit app
st.title("PDF Question Answering App")

# Check if a PDF file is uploaded
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")

# Initialize session state variables if they don't exist
if 'processed_data' not in st.session_state:
    st.session_state.processed_data = None
if 'qa_chain' not in st.session_state:
    st.session_state.qa_chain = None

# Process the PDF file if uploaded
if uploaded_file is not None and st.session_state.processed_data is None:
    with st.spinner('Processing the PDF...'):
        # Load and split documents
        documents = load_documents(uploaded_file)
        texts = split_documents(documents)

        # Create embeddings and vector store
        embeddings = create_embeddings('sentence-transformers/all-MiniLM-L6-v2')
        db = create_vector_store(texts, embeddings)

        # Store processed data in session state
        st.session_state.processed_data = {
            'documents': documents,
            'texts': texts,
            'embeddings': embeddings,
            'db': db
        }

        # Initialize the LLM (Language Model)

        model_id = 'meta-llama/Llama-2-7b-chat-hf'
        # Select GPU device (cuda)
        device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
        bnb_config = transformers.BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type='nf4',
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=bfloat16
        )

# begin initializing HF items, you need an access token
        hf_auth = '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'
        model_config = AutoConfig.from_pretrained(
            model_id,
            use_auth_token=hf_auth
        )

        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            trust_remote_code=True,
            config=model_config,
            quantization_config=bnb_config,
            device_map='auto',
            use_auth_token=hf_auth
        )

# enable evaluation mode to allow model inference
        model.eval()

        print(f"Model loaded on {device}")
        tokenizer = AutoTokenizer.from_pretrained(
        model_id,
        use_auth_token=hf_auth
        )
        stop_list = ['\nHuman:', '\n```\n']

        stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
        stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
        class StopOnTokens(StoppingCriteria):
            def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
                for stop_ids in stop_token_ids:
                    if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                        return True
                    return False

        stopping_criteria = StoppingCriteriaList([StopOnTokens()])
        generate_text = transformers.pipeline(
            model=model,
            tokenizer=tokenizer,
            return_full_text=False,  # langchain expects the full text
            task='text-generation',
    # we pass model parameters here too
            stopping_criteria=stopping_criteria,  # without this model rambles during chat
            temperature=0.01,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
            max_new_tokens=512,  # max number of tokens to generate in the output
            repetition_penalty=1.1  # without this output begins repeating
          )
        llm = HuggingFacePipeline(pipeline=generate_text)

        # Setup QA chain
        st.session_state.qa_chain = setup_retrieval_qa_chain(llm, db)

    st.success('PDF processed successfully! You can now ask questions.')

# Ask a question about the PDF
query = st.text_input("Ask a question about the PDF")


# Generate answer for the query
if query and st.session_state.qa_chain is not None:
    with st.spinner('Generating answer...'):
      # Chat history parameter can be used if chat history is stored after each query
        chat_history = []
        response = st.session_state.qa_chain({'question': query,'chat_history':chat_history})
        s=response['answer']
        # Print only the Helpful answer
        s+="Unhelpful Answer"
        st.write(trim_unhelpful_part(s))


Overwriting app.py


In [None]:
# This is needed to get password for local tunnel website (To run streamlit in Google Colab)
!wget -q -O - ipv4.icanhazip.com

35.198.196.149


In [None]:
# Run streamlit app, copy the password from the previous cell and paste it in the page which will open and Submit to open the streamlit app
! streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.198.196.149:8501[0m
[0m
[1G[0JNeed to install the following packages:
  localtunnel@2.0.2
Ok to proceed? (y) [20Gy
[K[?25hyour url is: https://crazy-sides-occur.loca.lt

>> from langchain.embeddings import HuggingFaceEmbeddings

with new imports of:

>> from langchain_community.embeddings import HuggingFaceEmbeddings
You can use the langchain cli to **automatically** upgrade many imports. Please see documentation here <https://python.langchain.com/v0.2/docs/versions/v0_2/>
  warn_deprecated(

>> from langchain.vectorstores import FAISS

with new imports of:

>> from langchain_community.vectorstores import FAISS
You can use the langchain cli to **automatically** upgrade m