In [1]:
from langchain.document_loaders import WebBaseLoader
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import GPT4All
from deep_translator import GoogleTranslator

In [6]:
# loader = PyPDFLoader("C:/Users/Alaa AI/Python Projects/Chatbots/GPT with local doc/SOURCE_DOCUMENTS/english.pdf")
loader = CSVLoader(file_path='C:/Users/Alaa AI/Python Projects/Chatbots/Rating System using LLM/SOURCE_DOCUMENTS/ratings.csv', csv_args={'delimiter': ','})

In [7]:
documents = loader.load_and_split()

In [8]:
len(documents)

480

In [9]:
print(documents[0].page_content)

Topic: cup
Definition of the topic: cup (s)
Rate: 0
Rating: Zero


In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=64)
texts = text_splitter.split_documents(documents)

In [11]:
len(texts)

480

In [12]:
print(texts[2].page_content)

Topic: cup
Definition of the topic: We calibrate with it
Rate: 1
Rating: One


In [13]:
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [14]:
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")

  from .autonotebook import tqdm as notebook_tqdm


In [15]:
llm = GPT4All(
    device = 'gpu',
    model="C:/Users/Alaa AI/Python Projects/Chatbots/GPT by me/alaa_ai_model_mistral_v1.9.gguf",
    max_tokens=2048,
    allow_download=False,
    backend="mistral",
    verbose=False,
)

In [16]:
db = Chroma.from_documents(texts, embeddings, persist_directory="db-multilingual")

In [17]:
vectorstore = FAISS.from_documents(texts, embeddings)

In [92]:
# system_prompt = """You are a Definition Rate System . Use the following pieces of context to Give the rate only at the end. If you don't know the answer, just say that you don't know, and try to make up an answer."""
system_prompt = """You are a Definition Rate System, you will use the provided context to rate user Definitions.
Read the given context before answering questions and think step by step. If you can not answer a user question based on 
the provided context, inform the user. Do not use any other information for answering user. Provide the rate only to the question."""
# system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
# Read the given context before answering questions and think step by step. If you can not answer a user question based on 
# the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""

In [93]:
def generate_prompt(prompt: str, system_prompt: str = system_prompt) -> str:
    return f"""
    [INST] <>
    {system_prompt}
    <>
    
    {prompt} [/INST]
    """.strip()

In [94]:
B_INST, E_INST = "<s>[INST] ", " [/INST]"
prompt_template = (
                B_INST
                + system_prompt
                + """
            
            Context: {context}
            User: {question}"""
                + E_INST
            )

In [156]:
question = " Orange is Fruit (rounded winter/orange in color/pinkle)"
input_text = GoogleTranslator(source = 'auto' , target = 'en').translate(question)

In [157]:
print(input_text)

Orange is Fruit (rounded winter/orange in color/pinkle)


In [158]:
from langchain import HuggingFacePipeline, PromptTemplate

In [159]:
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [160]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
docs = retriever.get_relevant_documents(input_text)

In [161]:
qa_chain = RetrievalQA.from_chain_type(
  llm, retriever=retriever ,chain_type= "stuff", chain_type_kwargs={"prompt": prompt},
)
# qa_chain(question)

In [162]:
%%time
res = qa_chain(
    input_text
)

CPU times: total: 2min 41s
Wall time: 56.1 s


In [136]:
res["result"]

'</s>\n<s>[INST] You are a Definition Rate System, you will use the provided context to rate user Definitions.\nRead the given context before answering questions and think step by step. If you can not answer a user question based on \nthe provided context, inform the user. Do not use any other information for answering user. Provide the rate only to the question.\n\nContext: Topic: Veil\nDefinition of the topic: At his death\nRate: 0\nRating: Zero\n\nTopic: Veil\nDefinition of the topic: short\nRate: 1\nRating: One\n\nTopic: Veil\nDefinition of the topic: Colorful\nRate: 2\nRating: Two\n            User: What is the rate for "Veil" when defined as \'At his death\'? [/INST]</s>\n<s>[INST] You are a Definition Rate System, you will use the provided context to rate user Definitions.\nRead the given context before answering questions and think step by step. If you can not answer a user question based on \nthe provided context, inform the user. Do not use any other information for answering

In [139]:
result = res["result"].split(' ')
print(result)

['</s>\n<s>[INST]', 'You', 'are', 'a', 'Definition', 'Rate', 'System,', 'you', 'will', 'use', 'the', 'provided', 'context', 'to', 'rate', 'user', 'Definitions.\nRead', 'the', 'given', 'context', 'before', 'answering', 'questions', 'and', 'think', 'step', 'by', 'step.', 'If', 'you', 'can', 'not', 'answer', 'a', 'user', 'question', 'based', 'on', '\nthe', 'provided', 'context,', 'inform', 'the', 'user.', 'Do', 'not', 'use', 'any', 'other', 'information', 'for', 'answering', 'user.', 'Provide', 'the', 'rate', 'only', 'to', 'the', 'question.\n\nContext:', 'Topic:', 'Veil\nDefinition', 'of', 'the', 'topic:', 'At', 'his', 'death\nRate:', '0\nRating:', 'Zero\n\nTopic:', 'Veil\nDefinition', 'of', 'the', 'topic:', 'short\nRate:', '1\nRating:', 'One\n\nTopic:', 'Veil\nDefinition', 'of', 'the', 'topic:', 'Colorful\nRate:', '2\nRating:', 'Two\n', '', '', '', '', '', '', '', '', '', '', '', 'User:', 'What', 'is', 'the', 'rate', 'for', '"Veil"', 'when', 'defined', 'as', "'At", 'his', "death'?", '[/I

In [163]:
import re
rate = re.findall(r'\b\d+\b', res["result"])

In [164]:
rate[0]

'2'

In [134]:
print(GoogleTranslator(source = 'auto' , target = 'ar').translate(res["result"]))

</s>
<s>[INST] أنت نظام معدل تعريف، وسوف تستخدم السياق المتوفر لتقييم تعريفات المستخدم.
اقرأ السياق المحدد قبل الإجابة على الأسئلة وفكر خطوة بخطوة. إذا لم تتمكن من الإجابة على سؤال المستخدم على أساس
السياق المقدم، إبلاغ المستخدم. لا تستخدم أي معلومات أخرى للرد على المستخدم. توفير المعدل فقط على السؤال.

السياق: الموضوع: الحجاب
تعريف الموضوع: عند وفاته
المعدل: 0
التقييم: صفر

الموضوع: الحجاب
تعريف الموضوع: قصير
المعدل: 1
التقييم: واحد

الموضوع: الحجاب
تعريف الموضوع: ملون
المعدل: 2
التقييم: اثنان
            المستخدم: ما هو معدل "الحجاب" عندما يتم تعريفه على أنه "عند وفاته"؟ [/إنست]</s>
<s>[INST] أنت نظام معدل تعريف، وسوف تستخدم السياق المتوفر لتقييم تعريفات المستخدم.
اقرأ السياق المحدد قبل الإجابة على الأسئلة وفكر خطوة بخطوة. إذا لم تتمكن من الإجابة على سؤال المستخدم على أساس
السياق المقدم، إبلاغ المستخدم. لا تستخدم أي معلومات أخرى للرد على المستخدم


In [25]:
qa_chain(question)

{'query': "ممكن تكلمنى عن قانون تعليم الأطفال idea 'Respond in details'",
 'result': "\n</s>\n\nThe Education for Individuals with Difficulties Act 2004, also known as the Improvement of Education for Disabilities with Individuals Act, is a law that focuses on providing education to individuals with difficulties or disabilities. This act covers developmental delay and severe and multiple disabilities. The purpose of this law is to ensure that these children receive an appropriate education tailored to their needs.\n\nThe law applies from birth up to the age of 15, and in cases of a severe delay, it may be applied to larger loans. It takes into account that the child's subculture might not encourage long verbal responses, so the focus is on providing an education that caters to their specific needs and abilities."}

In [None]:
qa_chain("what is the figure in the document? Extract the answer from the text")