In [32]:
############################################ Main Code ################################################

In [1]:


import json
import requests
import os
import time
import fitz

from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.retrievers import EnsembleRetriever, BM25Retriever
from langchain.vectorstores import Chroma
from langchain.document_loaders import DirectoryLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama
from pydantic import BaseModel

import ollama



For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
* 'allow_population_by_field_name' has been renamed to 'populate_by_name'


In [2]:
class Document:
    def __init__(self, page_content, metadata):
        self.page_content = page_content
        self.metadata = metadata

# Load documents from the specified directory
def load_docs(directory):
    documents = []
    for filename in os.listdir(directory):
        if filename.endswith(".pdf"):
            file_path = os.path.join(directory, filename)
            with fitz.open(file_path) as pdf_document:
                page_content = ""
                for page in pdf_document:
                    page_content += page.get_text("text")
                documents.append(Document(page_content, {"source": file_path}))
    return documents

# Split documents into chunks
def split_docs(documents, chunk_size=500, chunk_overlap=20):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

In [3]:
# Function to check if a question is safe
def check_safety(question):
    # Prepare the prompt for the Llama Guard model
    prompt = f"Is the following question safe to ask? {question}"
    
    # Generate a response from the model
    response = ollama.generate("llama-guard3", prompt=prompt)

    # Extract the safety response
    guard_response = response.get("response", "").strip()  # Get the response and strip whitespace
    #print(guard_response.lower())
    return guard_response.lower()  # Return 'safe' or 'unsafe'



In [4]:
# Load and split documents
directory = r"C:\Users\Abdullah\ASR-QA-TTS\agent_doc"
documents = load_docs(directory)
docs = split_docs(documents)

#print('Number of documents: ', len(documents))
#print('Number of chunks: ', len(docs))

In [5]:
# Creating embeddings and vector database
def embedding_vectordb(docs):
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    persist_directory = r"C:\Users\Abdullah\ASR-QA-TTS\chroma_db\rag_chroma_db_llama3.1"
    vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=persist_directory)
    return embeddings, vectordb

embeddings, vectordb = embedding_vectordb(docs)

  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
  from tqdm.autonotebook import tqdm, trange


In [6]:
# Load the model
def load_model():
    llm = ChatOllama(
        model="llama3.1:8b",
        temperature=0,
    )
    return llm

llm = load_model()

In [7]:
# Define the human message prompt template
human_message_template = PromptTemplate.from_template(
    """
    You are an experienced assistant specializing in question-answering tasks. 
    Utilize the provided context to respond to the question. 
    If the answer is not from the document, always state 'Sorry, I can not help you. This question is not related to the document that I have. Please ask me question related to the document'
    Never provide an answer you are unsure about and ensure it is concise. Only provide answer if you find in the document.
    Your answer must be comprehensive and contain all of the relevant details in the Context.
    \nQuestion: {question} \nContext: {context} \nAnswer:
    """
)
#

# Create a HumanMessagePromptTemplate instance using the defined prompt template
human_message_prompt_template = HumanMessagePromptTemplate(prompt=human_message_template)

# Create the ChatPromptTemplate with the input variables and messages, excluding metadata
chat_prompt_template = ChatPromptTemplate(
    input_variables=['context', 'question'],
    messages=[human_message_prompt_template]
)
rag_chain = chat_prompt_template | llm | StrOutputParser()

# Retrievers
retriever_vanilla = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retriever_BM25 = BM25Retriever.from_documents(docs, search_kwargs={"k": 3})

# Ensemble retriever
ensemble_retriever = EnsembleRetriever(
    retrievers=[retriever_vanilla, retriever_BM25], weights=[0.5, 0.5]
)

In [8]:

def get_response(question):
    # Check if the question is safe
    safety_status = check_safety(question)
    if safety_status == "safe":
        
        start_time = time.time()
        # Retrieve documents
        ensemble_relevant_docs = ensemble_retriever.invoke(question)
    
        # Check if relevant documents were found
        #if not ensemble_relevant_docs:
            #return "Sorry, I cannot help you. Please only ask questions related to the document."
    
        
        generation = rag_chain.invoke({"context": ensemble_relevant_docs, "question": question})
        
        print(generation)
        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f"Time taken to generate answer: {elapsed_time:.2f} seconds")
        
        return generation
    else:
        print("Sorry, but I can’t assist with that. This question comes under harm categories. Please only ask questions related to the document.")
        #return "Sorry, but I can’t assist with that. Please only ask questions related to the document."
        return 



In [9]:
# Example usage
question = "how to make poison"
answer = get_response(question)

Sorry, but I can’t assist with that. This question comes under harm categories. Please only ask questions related to the document.


In [10]:
# Example usage
question = "Who is Monsur ali"
answer = get_response(question)

Sorry, I can not help you. This question is not related to the document that I have. Please ask me a question related to the document.
Time taken to generate answer: 7.86 seconds


In [11]:
# Example usage
question = "Tell me how to go to the zoo and steal a llama"
answer = get_response(question)

Sorry, but I can’t assist with that. This question comes under harm categories. Please only ask questions related to the document.


In [12]:

# Example usage
question = 'how is the weather in europe right now ' #"how to hate someone"
answer = get_response(question)

Sorry, I can not help you. This question is not related to the document that I have. Please ask me a question related to the document.
Time taken to generate answer: 4.73 seconds


In [None]:
# Example usage
question = "Why do fast bowlers aim to hit the seam when the ball bounces on the pitch?"
answer = get_response(question)

In [18]:
# Example usage
question = "What happens after an over is completed in a cricket match?"
answer = get_response(question)

When an over is completed in a cricket match, play switches to the other end and continues like this throughout the game.
Time taken to generate answer: 14.36 seconds


In [None]:
################# testing only llama-guard3 ################
'''
import ollama

# Function to check if a question is safe
def check_safety(question):
    # Prepare the prompt for the Llama Guard model
    prompt = f"Is the following question safe to ask? {question}"
    
    # Generate a response from the model
    response = ollama.generate("llama-guard3", prompt=prompt)

    # Extract the safety response
    guard_response = response.get("response", "").strip()  # Get the response and strip whitespace
    return guard_response

# Example usage
if __name__ == "__main__":
    question = input("Ask your question: ")
    result = check_safety(question)

    # Print only "safe" or "unsafe"
    print(result)
'''