In [1]:
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
import torch
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from transformers import AutoTokenizer, TextStreamer, pipeline , GenerationConfig
from deep_translator import GoogleTranslator
from auto_gptq import AutoGPTQForCausalLM
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEVICE

'cuda:0'

In [3]:
# !wget https://gist.githubusercontent.com/armgilles/194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv 

In [4]:
# Load the documents
loader = CSVLoader(file_path='ratings.csv' , csv_args={'delimiter': ','})

In [5]:
docs = loader.load_and_split()
len(docs)

480

In [6]:
print(docs[0].page_content)

Topic: cup
Definition of the topic: cup (s)
Rate: 0
Rating: Zero


In [7]:
embeddings = HuggingFaceInstructEmbeddings(
        model_name="intfloat/multilingual-e5-large",
        model_kwargs={"device": DEVICE},
    )

load INSTRUCTOR_Transformer
max_seq_length  512


In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
texts = text_splitter.split_documents(docs)
len(texts)

480

In [None]:
texts

In [9]:
%%time
db = Chroma.from_documents(texts, embeddings, persist_directory="db")

CPU times: total: 6.8 s
Wall time: 6.49 s


In [10]:
model_name_or_path = "TheBloke/Llama-2-7B-chat-GPTQ"
model_basename = "model"
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

In [11]:
model = AutoGPTQForCausalLM.from_quantized(
    model_name_or_path,
    # revision="gptq-4bit-128g-actorder_True",
    model_basename=model_basename,
    use_safetensors=True,
    trust_remote_code=True,
    inject_fused_attention=False,
    device=DEVICE,
    quantize_config=None,
)

Skipping module injection for FusedLlamaMLPForQuantizedModel as currently not supported with use_triton=False.


In [12]:
# DEFAULT_SYSTEM_PROMPT = """
# You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

# If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
# """.strip()
# DEFAULT_SYSTEM_PROMPT = """You are a Rate System can rate any definition from 0 to 10. if your rate is between 0 and 3 respond with '0' , if your rate is between 4 to 7 respond with '1' and if your rate is between 8 to 10 respond with '2'""".strip()
DEFAULT_SYSTEM_PROMPT = """You are a Definition Rate System, you will use the provided context to rate user Definitions.
Read the given context before answering questions and think step by step. If you can not answer a user question based on 
the provided context, inform the user. Do not use any other information for answering user. Provide the rate only to the question."""
def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
    return f"""
[INST] <>
{system_prompt}
<>

{prompt} [/INST]
""".strip()

In [13]:
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

In [14]:
# Avoid warning 
generation_config = GenerationConfig.from_pretrained(model_name_or_path)

In [15]:
text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    temperature=0.7,
    # top_p=0.95,
    repetition_penalty=1.15,
    do_sample=True,
    streamer=streamer,
    generation_config=generation_config,
)

The model 'LlamaGPTQForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PersimmonForCausalLM', 'PLBartFo

In [16]:
llm = HuggingFacePipeline(pipeline=text_pipeline)#, model_kwargs={"temperature": 0}

In [17]:
# SYSTEM_PROMPT = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
# SYSTEM_PROMPT = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, and try to make up an answer."
# SYSTEM_PROMPT = "You are a Definition Rate System . Use the following pieces of context to Give the rate only at the end. If you don't know the answer, just say that you don't know, and try to make up an answer."
SYSTEM_PROMPT = """You are a Definition Rate System, you will use the provided context to rate user Definitions.
Read the given context before answering questions and think step by step. If you can not answer a user question based on 
the provided context, inform the user. Do not use any other information for answering user. Provide the rate only to the question."""
template = generate_prompt(
    """
{context}

Question: {question}
""",
    system_prompt=SYSTEM_PROMPT,
)

In [18]:
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [19]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler  # for streaming response
from langchain.callbacks.manager import CallbackManager

In [20]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [21]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
    callbacks=callback_manager,
)

In [None]:
while True:
    query = input("\nEnter a query: ")
    if query == "exit":
        break
    # Get the answer from the chain
    query = GoogleTranslator(source = 'auto' , target = 'en').translate(query)
    res = qa_chain(query)
    answer, docs = res["result"], res["source_documents"]

    # Print the result
    # print("\n\n> Question:")
    print(query)
    print("\n> Answer:")
    print(GoogleTranslator(source = 'auto' , target = 'ar').translate(answer))


Enter a query:   Veil is At his death


 Understood! Based on the provided context, I would give the following ratings:

Topic: Veil
Question: Veil
Answer: At his death
Rate: 0
Veil is At his death

> Answer:
مفهوم! بناءً على السياق المقدم، سأعطي التقييمات التالية:

الموضوع: الحجاب
السؤال: الحجاب
الجواب: عند وفاته
المعدل: 0



Enter a query:   Veil is A covering that a girl wears on her head when praying and going out


 Thank you for using my services! I'll be happy to help you with your questions.
Based on the provided context, here are my ratings for each question:
1. Topic: Awwal
Question: Awwal
Answer: 2
Grade: 2

Explanation: This definition seems to be accurate and relevant to the topic of Awwal. It describes the disappearance or absence of the sun, which is an important concept in many cultures and belief systems.
2. Question: Veil is A covering that a girl wears on her head when praying and going out
Grade: 1

Explanation: This definition is partially accurate but lacks clarity and detail. A veil is indeed a type of garment that some girls wear on their heads, particularly during prayer or social events. However, it would be more helpful if the user provided additional context or details about the purpose or significance of the veil.
Veil is A covering that a girl wears on her head when praying and going out

> Answer:
شكرا لاستخدامك خدماتي! سأكون سعيدًا بمساعدتك في الإجابة على أسئلتك.
بناءً 


Enter a query:   Veil is scarf that we wear on our heads


 I'm happy to help! Here's my response:
Grade: 2

Veil is a scarf that we wear on our heads.
Veil is scarf that we wear on our heads

> Answer:
أنا سعيد للمساعدة! وهنا ردي:
الصف: 2

الحجاب هو وشاح نرتديه على رؤوسنا.



Enter a query:   Veil is Made of silk and cotton


 Great! I'm ready to assist you with rating definitions. Based on the context you provided, here's my response:
Topic: Veil
Question: Is veil made of silk and cotton?
Grade: 3
Veil is Made of silk and cotton

> Answer:
عظيم! أنا على استعداد لمساعدتك في تعريفات التصنيف. بناءً على السياق الذي قدمته، إليك ردي:
الموضوع: الحجاب
السؤال: هل الحجاب مصنوع من الحرير والقطن؟
الصف 3
