<a href="https://colab.research.google.com/github/almutareb/advanced-rag-system-anatomy/blob/main/Functions_from_intel_rag_hackathon_NOT_WORKING.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys
import os
!pip install langchain langchain-community --no-warn-script-location > /dev/null
!pip install beautifulsoup4 --no-warn-script-location > /dev/null
!pip install faiss-gpu --no-warn-script-location > /dev/null
!pip install chromadb --no-warn-script-location > /dev/null
!pip install validators --no-warn-script-location > /dev/null
!pip install sentence_transformers typing-extensions==4.8.0 unstructured --no-warn-script-location > /dev/null
!pip install gradio==3.48.0 --no-warn-script-location > /dev/null
!pip install boto3 --no-warn-script-location > /dev/null

In [None]:
# documents loader function
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
from bs4 import BeautifulSoup as Soup
from validators import url as url_validator

def load_docs_from_urls(urls: list = None, max_depth: int = 5):
    """

    """
    if urls is None:
        urls = ["https://docs.python.org/3/"]  # Default URL list
    docs = []
    for url in urls:
        if not url_validator(url):
            raise ValueError(f"Invalid URL: {url}")
        loader = RecursiveUrlLoader(url=url, max_depth=max_depth, extractor=lambda x: Soup(x, "html.parser").text)
        docs.extend(loader.load())
    print(f"loaded {len(docs)} pages")
    return docs
    #documents = loader.load()

In [None]:
# embeddings functions
from langchain.vectorstores import FAISS
from langchain.document_loaders import ReadTheDocsLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
import time


def create_embeddings(docs: list, embedding_model: str, chunk_size:int, chunk_overlap:int):
    text_splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n", "\n", "(?<=\. )", " ", ""],
        chunk_size = chunk_size,
        chunk_overlap  = chunk_overlap,
        length_function = len,
    )

    # Stage one: read all the docs, split them into chunks.
    st = time.time()
    print('Loading documents ...')

    chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs])
    et = time.time() - st
    print(f'Time taken: {et} seconds.')

    #Stage two: embed the docs.
    if embedding_model is None:
        embedding_model = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
    print(f"create a total of {len(chunks)}")

    return embeddings,chunks




In [3]:
# preprocessed vectorstore retrieval
import boto3
from botocore import UNSIGNED
from botocore.client import Config
import zipfile
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

# access .env file

s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))

## FAISS
FILE_NAME = 'lc-faiss-multi-mpnet-500-markdown'
FAISS_INDEX_PATH = './vectorstore/'+FILE_NAME
VS_DESTINATION = FAISS_INDEX_PATH+".zip"
s3.download_file('rad-rag-demos', 'vectorstores/lc-faiss-multi-mpnet-500-markdown.zip', VS_DESTINATION)
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
    zip_ref.extractall('./vectorstore/')

## Chroma DB
VS_DESTINATION = "./vectorstore/lc-chroma-multi-mpnet-500-markdown.zip"
s3.download_file('rad-rag-demos', 'vectorstore/lc-chroma-multi-mpnet-500-markdown.zip', VS_DESTINATION)
#db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
#db.get()
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
    zip_ref.extractall('./vectorstore/')

model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
#model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
#    model_kwargs=model_kwargs
    )

db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)

FileNotFoundError: [Errno 2] No such file or directory: './vectorstore/lc-faiss-multi-mpnet-500-markdown.zip.656CEC4c'

In [None]:
# vectorization functions
from langchain.vectorstores import FAISS
from langchain.document_loaders import ReadTheDocsLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
import time

def build_vector_store(docs: list, db_path: str, embedding_model: str, new_db:bool=False, chunk_size:int=500, chunk_overlap:int=50):
    """

    """

    if db_path is None:
        FAISS_INDEX_PATH = "./vectorstore/py-faiss-multi-mpnet-500"
    else:
        FAISS_INDEX_PATH = db_path

    embeddings,chunks = create_embeddings(docs, embedding_model, chunk_size, chunk_overlap)

    #load chunks into vector store
    print(f'Loading chunks into faiss vector store ...')
    st = time.time()
    if new_db:
        db_faiss = FAISS.from_documents(chunks, embeddings)
    else:
        db_faiss = FAISS.add_documents(chunks, embeddings)
    db_faiss.save_local(FAISS_INDEX_PATH)
    et = time.time() - st
    print(f'Time taken: {et} seconds.')

    #print(f'Loading chunks into chroma vector store ...')
    #st = time.time()
    #persist_directory='./vectorstore/py-chroma-multi-mpnet-500'
    #db_chroma = Chroma.from_documents(chunks, embeddings, persist_directory=persist_directory)
    #et = time.time() - st
    #print(f'Time taken: {et} seconds.')
    result = f"built vectore store at {FAISS_INDEX_PATH}"
    return result

In [None]:
# prompts and chatlogic function
# HF libraries
from langchain.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceHubEmbeddings
# prediction guard
from langchain.llms import PredictionGuard
# prompt template
from langchain.prompts import PromptTemplate

category_template = """### Instruction:
Read the below input and determine if it is a request to search, explain, generate computer code?
Respond only with "generation" if it requests code, "explanation" if asks for explaination, "search" if it is searching for a function or tool and no other text.
Respond with "chat" if it does not fit any of the mentioned categories and no other text.</s>

### Input:
{query}</s>

### Response:
"""

category_prompt = PromptTemplate(
    input_variables=["query"],
    template=category_template
)

qa_template = """### Instruction:
Read the documentation exerpt (delimited by <ctx></ctx>) and the chat history (delimited by <hs></hs>) below to respond with a detailed answer to the given question.
First take a step back to reflect on the question breaking it down into smaller steps and then go through the answer step by step and one function at a time.
If the question cannot be answered based on the documentation exerpt alone or the documentation does not explicitly say the answer to the question,
write "Sorry I had trouble answering this question, based on the information I found."

<ctx>
Documentation: {context}
</ctx>

------
<hs>
{history}
</hs>
------
</s>

### Input:
Question: {query}</s>

### Response:
"""

qa_prompt = PromptTemplate(
    input_variables=["context", "history", "query"],
    template=qa_template
)

chat_template = """### Instruction:
You are a friendly and clever AI assistant. Respond to the latest human message in the conversation below.
Use the context (delimited by <ctx></ctx>) and conversation history (delimited by <hs></hs>).

<ctx>
{context}
</ctx>

------
<hs>
{history}
</hs>
------
</s>

### Input:
Human: {query}</s>
AI:

### Response:
"""

chat_prompt = PromptTemplate(
    input_variables=["context", "history", "query"],
    template=chat_template
)

code_template = """### Instruction:
You are a code generation assistant. Respond with a code snippet and any explanation requested in the below input.
Use the documentation context (delimited by <ctx></ctx>) and conversation history (delimited by <hs></hs>) to better understand the goal of the code

<ctx>
{context}
</ctx>

------
<hs>
{history}
</hs>
------
</s>

### Input:
{query}</s>

### Response:
"""

code_prompt = PromptTemplate(
    input_variables=["context", "history", "query"],
    template=code_template
)


def get_response_chain(query, history, question):
    model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
    embeddings = HuggingFaceHubEmbeddings(repo_id=model_name)
  # Determine what kind of message this is.
    print(f"asking the Llama!")
    msg_category = pg.Completion.create(
      model="Nous-Hermes-Llama2-13B",
      prompt=category_prompt.format(query=query)
    )['choices'][0]['text'].lower()

    print(f"asked the llama and it said: {msg_category}")

  # configure our chain

    if msg_category == "explanation":
        print(f"it is an explanation")

        # Handle the informational request.
        #result = pg.Completion.create(
        #    model="WizardCoder",
        #    prompt=qa_prompt.format(context=info_context, history=chat_history, query=message)
        #)
        #completion = result['choices'][0]['text'].split('#')[0].strip()
        model_id = HuggingFaceHub(repo_id="codellama/CodeLlama-13b-Instruct-hf", model_kwargs={
            "max_new_tokens":2048,
            "repetition_penalty":1.2,
            })
        FAISS_INDEX_PATH='./vectorstore/lc-faiss-multi-mpnet-500'
        db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
        retriever = db.as_retriever()
        memory = ConversationBufferMemory(memory_key="history", input_key="query")
        formatted_prompt = qa_prompt.format(context=db.similarity_search(query),history=history, query=query)
        qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, verbose=False, return_source_documents=True, chain_type_kwargs={
            #"verbose": True,
            "memory": memory,
            "prompt": formatted_prompt
        }
            )


    elif msg_category == "generation":
        print(f"it is a generation")

        # Handle the code generation request.
        #result = pg.Completion.create(
        #    model="WizardCoder",
        #    prompt=code_prompt.format(context=info_context, history=chat_history, query=message),
        #    #max_tokens=500
        #)
        #completion = result['choices'][0]['text']
        model_id = HuggingFaceHub(repo_id="codellama/CodeLlama-13b-Instruct-hf", model_kwargs={
            "max_new_tokens":2048,
            "repetition_penalty":1.2,
            })
        FAISS_INDEX_PATH='./vectorstore/lc-git-faiss'
        db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
        retriever = db.as_retriever()
        memory = ConversationBufferMemory(memory_key="history", input_key="question")
        formatted_prompt = code_prompt.format(context=db.similarity_search(query),history=history, query=query)
        qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, verbose=False, return_source_documents=True, chain_type_kwargs={
            #"verbose": True,
            "memory": memory,
            "prompt": formatted_prompt
        }
            )

    else:
        print(f"it is a chat")

        # Handle the chat message.
        #result = pg.Completion.create(
        #    model="Nous-Hermes-Llama2-13B",
        #    prompt=chat_prompt.format(context=info_context, history=chat_history, query=message),
        #    output={
        #        "toxicity": True
        #    }
        #)
        #completion = result['choices'][0]['text'].split('Human:')[0].strip()
        model_id = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={
            "temperature":0.1,
            "max_new_tokens":2048,
            "repetition_penalty":1.2,
            "return_full_text":True
            })
        FAISS_INDEX_PATH='./vectorstore/py-faiss-multi-mpnet-500'
        db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
        retriever = db.as_retriever()
        memory = ConversationBufferMemory(memory_key="history", input_key="question")
        formatted_prompt = chat_prompt.format(context=db.similarity_search(query),history=history, query=query)
        qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, verbose=False, return_source_documents=True, chain_type_kwargs={
            #"verbose": True,
            "memory": memory,
            "prompt": formatted_prompt
        }
            )

    return qa

In [None]:
# retriever and qa_chain function

# HF libraries
from langchain.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceHubEmbeddings
# vectorestore
from langchain.vectorstores import FAISS
# retrieval chain
from langchain.chains import RetrievalQA
# prompt template
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory


def get_db_retriever(vector_db:str=None):
    model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
    embeddings = HuggingFaceHubEmbeddings(repo_id=model_name)

    #db = Chroma(persist_directory="./vectorstore/lc-chroma-multi-mpnet-500", embedding_function=embeddings)
    #db.get()
    if not vector_db:
        FAISS_INDEX_PATH='./vectorstore/py-faiss-multi-mpnet-500'
    else:
        FAISS_INDEX_PATH=vector_db
    db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)

    retriever = db.as_retriever()

    return retriever


def qa_chain(query, history, question):

    retriever = get_db_retriever()
    prompt, model_id = get_response_chain(query, history, question)
    print(f"calling qa_instance with {prompt}\n, {model_id}\n, {query}\n, {history}\n, {question}\n")

    memory = ConversationBufferMemory(memory_key="history", input_key="question")
    qa_instance = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, verbose=False, return_source_documents=True, chain_type_kwargs={
        #"verbose": True,
        "memory": memory,
        "prompt": prompt
        }
    )
    result = qa_instance({"query": query, "history": history, "question": question})

    return result

