----

# **GenAI - LangChain - FAISS as Vector db - Gradio appxx**

---

In [None]:
## Create a conda env. I used tensorflow

In [None]:
!pip install accelerate tiktoken openai gradio torch accelerate safetensors sentence-transformers faiss-gpu bitsandbytes pypdf typing-extensions PyPDF2 
!pip install tokenizers --upgrade
!pip install transformers -U
!pip install langchain -U
!pip install gradio --upgrade

# **Run the below cell after installations. Click on the public URL button**

## Add your variables

In [11]:
compartment_ocid="ocid1.compartment.oc1..aaaaaaaae3n6r6hrjipbap2hojicrsvkzatrtlwvsyrpyjd7wjnw4za3m75q"
max_return_from_vector = 4
CHUNK_SIZE = 1000


## Run the below. Click on Public URL.

In [12]:
import uuid

In [None]:
import langchain_community
import langchain
import oci
import gradio as gr
import torch
import PyPDF2 # pdf reader
import time
import oci
import ads
import os
from pypdf import PdfReader
from io import BytesIO
from langchain.prompts import PromptTemplate 
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain.embeddings import HuggingFaceEmbeddings 
from langchain.vectorstores import FAISS 
from langchain.chains import RetrievalQA 
from langchain.memory import ConversationBufferMemory 
from langchain.document_loaders import PyPDFDirectoryLoader 
from transformers import AutoTokenizer
from langchain.chains import ConversationChain
from langchain_community.llms import OCIGenAI
import transformers
import tokenizers
import torch
import warnings
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig


##########################################################################################################################################
########################################################################################################################################## Generate Embeddings
##########################################################################################################################################


# Using HuggingFaceEmbeddings with the chosen embedding model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",model_kwargs = {"device": "cpu"})                                                                         ### IF You use GPU, change 'cpu' to 'cuda'

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

##########################################################################################################################################
########################################################################################################################################## Load GenAI 
##########################################################################################################################################

def load_llm():
    
    print("Start load GenAI")
    
    compartment_id=compartment_ocid

    llm = OCIGenAI(
    model_id="cohere.command",
    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
    compartment_id=compartment_id,
    model_kwargs = {"max_tokens":1024},
    verbose=False)

    return llm

##########################################################################################################################################
########################################################################################################################################## Create history
##########################################################################################################################################

def add_text(history, text):

    print("Start add text")
    if not text:
        raise gr.Error('Enter text')
    history = history + [(text, '')]
    return history

##########################################################################################################################################
########################################################################################################################################## Upload files
##########################################################################################################################################

def upload_file(files):
    print(type(files))
    print("done with upload_files")
    
    files = files[0].name
    print(files)

    return files

##########################################################################################################################################
########################################################################################################################################## Process files
##########################################################################################################################################

def process_file(files):

    print("start process_files")
    """Function reads each loaded file, and extracts text from each of their pages
    The extracted text is store in the 'text variable which is the passed to the splitter
    to make smaller chunks necessary for easier information retrieval and adhere to max-tokens(4096) of DeciLM-7B-instruct"""

    pdf_text = ""
    for file in files:
        pdf = PyPDF2.PdfReader(file.name)
        print(pdf)
        for page in pdf.pages:
            pdf_text += page.extract_text()


    # split into smaller chunks
    print("Start chuncking")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=200)
    splits = text_splitter.create_documents([pdf_text])

    # create a FAISS vector store db. Create embeddings and adding to faiss
    print("Store embeddings in FAISS")
    vectorstore_db = FAISS.from_documents(splits, embeddings)
    

    #create a custom prompt
    custom_prompt_template = """You have been given the following documents to answer the user's question.
    If you do not have information from the files given to answer the questions just say I don't have information from the given files to answer. Do not try to make up an answer.
    Context: {context}
    History: {history}
    Question: {question}

    Helpful answer:
    """
    prompt = PromptTemplate(template=custom_prompt_template, input_variables=["question", "context", "history"])

    # set QA chain with memory
    qa_chain_with_memory = RetrievalQA.from_chain_type(llm=load_llm(),
                                                       chain_type='stuff',
                                                       return_source_documents=True,
                                                       retriever=vectorstore_db.as_retriever(search_kwargs={"k": max_return_from_vector}),
                                                                           chain_type_kwargs={"verbose": False,
                                                                          "prompt": prompt,
                                                                          "memory": ConversationBufferMemory(
                                                                              input_key="question",
                                                                              memory_key="history",
                                                                              return_messages=True) })
    # get answers
    
    print("returning qa_chain_with_memotry")
    
    return qa_chain_with_memory

##########################################################################################################################################
########################################################################################################################################## Main
##########################################################################################################################################

def generate_bot_response(history,query, btn):
    
    print("Start generate_bot_response")
    
    qa_chain_with_memory = process_file(btn) # run the qa chain with files from upload
    bot_response = qa_chain_with_memory({"query": query})
    
    print("--" *50)
    print("Bot response is")
    print(bot_response)
    print("--" *50)
 
    for char in bot_response['result']:
        history[-1][-1] += char
        time.sleep(0.05)
        yield history,''


##########################################################################################################################################
########################################################################################################################################## Gradio
##########################################################################################################################################

with gr.Blocks() as demo:
    css=".contain { display: flex !important; flex-direction: column !important; }"
    "#component-0, #component-3, #component-10, #component-8  { height: 100% !important; }"
    "#chatbot { flex-grow: 1 !important; overflow: auto !important;}"
    "#col { height: 100vh !important; }"
    with gr.Row():
            with gr.Row():
              # Chatbot interface
              chatbot = gr.Chatbot(label="Oracle GenAI",
                                   value=[],
                                   elem_id='chatbot',
                                   render=True,
                                    bubble_full_width=False)
                
                
            with gr.Column():
                # PDF upload button
                btn = gr.UploadButton("📁 Upload a PDF(s)",
                                      file_types=[".pdf"],
                                      file_count="multiple")
                with gr.Row():
                  # Uploaded PDFs window
                  files = gr.File(label="Your PDFs")

            

    with gr.Column():
        with gr.Column():
          # Ask question input field
          txt = gr.Text(show_label=False, placeholder="Enter question")

        with gr.Column():
          # button to submit question to the bot
          submit_btn = gr.Button('Ask')

    # Event handler for uploading a PDF
    btn.upload(fn=upload_file, inputs=[btn], outputs=[files])

    # Event handler for submitting text question and generating response
    submit_btn.click(
        fn= add_text,
        inputs=[chatbot, txt],
        outputs=[chatbot],
        queue=True
        ).success(
          fn=generate_bot_response,
          inputs=[chatbot, txt, btn],
          outputs=[chatbot, txt]
        ).success(
          fn=upload_file,
          inputs=[btn],
          outputs=[files]
        )

if __name__ == "__main__":
#     demo.queue()
    demo.launch(share=True, debug=True) # launch app

Running on local URL:  http://127.0.0.1:7860


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Running on public URL: https://bef910c58f4ae1eaeb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


<class 'list'>
done with upload_files
/tmp/gradio/b7d0a1434d39bdbbe0fae9ece7d64c970e022683/example_strategy.pdf
Start add text
Start generate_bot_response
start process_files
<PyPDF2._reader.PdfReader object at 0x7f795c9f12b0>
Start chuncking
Store embeddings in FAISS
Start load GenAI
returning qa_chain_with_memotry
----------------------------------------------------------------------------------------------------
Bot response is
{'query': 'What is an autonomous database?', 'result': 'An autonomous database is a self-driving database system that requires minimal human labor for database management tasks like provisioning, security, monitoring, backups, recovery, troubleshooting, and tuning. It is designed to run on the Exadata platform and leverages various Exadata features for improved performance. The Oracle Autonomous Database, for example, is built on the Oracle Database and is designed to provide superior performance, reliability, and scalability while minimizing human error and 