# Running model with local docs : 

# 1) Constants : 

In [1]:
import os

from chromadb.config import Settings

from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, Docx2txtLoader
from langchain.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader

In [2]:
# Define the folder for storing database
SOURCE_DIRECTORY = f"SOURCE_DOCUMENTS"

In [3]:
PERSIST_DIRECTORY = f"db-multilingual-v2"

In [4]:
# Can be changed to a specific number
INGEST_THREADS = os.cpu_count() or 8

In [5]:
print(INGEST_THREADS)

8


In [6]:
# Define the Chroma settings
CHROMA_SETTINGS = Settings(
    anonymized_telemetry=False,
    is_persistent=True,
)

In [7]:
N_GPU_LAYERS = 100  # Llama-2-70B has 83 layers
N_BATCH = 512

In [8]:
DOCUMENT_MAP = {
    ".txt": TextLoader,
    ".md": UnstructuredMarkdownLoader,
    ".py": TextLoader,
    ".pdf": PDFMinerLoader,
#     ".pdf": UnstructuredFileLoader,
    ".csv": CSVLoader,
#     ".xls": UnstructuredExcelLoader,
#     ".xlsx": UnstructuredExcelLoader,
    ".docx": Docx2txtLoader,
    ".doc": Docx2txtLoader,
}

In [9]:
from auto_gptq import AutoGPTQForCausalLM
from huggingface_hub import hf_hub_download
from langchain.llms import LlamaCpp

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    LlamaForCausalLM,
    LlamaTokenizer,
)

  from .autonotebook import tqdm as notebook_tqdm


# Ingest : 

In [None]:
import logging
import os
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [None]:
#Modified from video
def load_single_document(file_path: str) -> Document:
    # Loads a single document from a file path
    if file_path.endswith(".txt"):
        loader = TextLoader(file_path , encoding = "utf8")
    elif file_path.endswith(".pdf"):
        loader = PDFMinerLoader(file_path)
    elif file_path.endswith(".csv"):
        loader = CSVLoader(file_path)
    return loader.load()[0]

In [None]:
# Modified from video
def load_documents(source_dir: str) -> list[Document]:
    # Loads all documents from the source documents directory, including nested folders
    all_files = os.listdir(source_dir)        
    return [load_single_document(f"{source_dir}/{file_path}") for file_path in all_files]

In [None]:
# Modified from video
def main():
    # Load documents and split in chunks
    print(f"Loading documents from {SOURCE_DIRECTORY}")
    documents = load_documents(SOURCE_DIRECTORY)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)
    print(f"Loaded {len(documents)} documents from {SOURCE_DIRECTORY}")
    print(f"Split into {len(texts)} chunks of text")

    
    # Create embeddings
#     EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" 
    
    embeddings = HuggingFaceInstructEmbeddings(
        model_name="intfloat/multilingual-e5-large",
        model_kwargs={"device": "cuda"},
    )

    db = Chroma.from_documents(
        texts,
        embeddings,
        persist_directory=PERSIST_DIRECTORY,
        client_settings=CHROMA_SETTINGS,
    )
    db.persist()
    db = None

In [None]:
if __name__ == "__main__":
    main()

# Run Local GPT

In [28]:
import os
import torch
import utils
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler  # for streaming response
from langchain.callbacks.manager import CallbackManager

from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from langchain.docstore.document import Document

from langchain.vectorstores import Chroma

In [None]:
# from ctransformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
from langchain.llms import GPT4All
from deep_translator import GoogleTranslator

In [None]:
from transformers import (
    GenerationConfig,
    pipeline,
)

In [None]:
def load_model():
    local_llm = GPT4All(
        device = 'gpu',
        model="C:/Users/Alaa AI/Python Projects/Chatbots/GPT by me/alaa_ai_model_gptj.gguf",
        max_tokens=2048,
        allow_download=False,
        backend="mistral"
    #     verbose=True,
    )
    print("Local LLM Loaded")

    return local_llm

In [None]:
def main():
#     EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" 
    embeddings = HuggingFaceInstructEmbeddings(
        model_name="intfloat/multilingual-e5-large",
        model_kwargs={"device": "cuda"},
    )

    db = Chroma(
        embedding_function = embeddings,
        persist_directory=PERSIST_DIRECTORY,
        client_settings=CHROMA_SETTINGS,
    )
    
    retriever = db.as_retriever()
    # Prepare the LLM
#     callbacks = [StreamStdOutCallbackHandler()]
    # Load the LLM for generating Natural Language response.
    llm = load_model()
    print("Model is Loaded Successfully ....")
    
    qa = RetrievalQA.from_chain_type(llm = llm , chain_type = "stuff" , retriever = db.as_retriever(search_kwargs = {"k" : 3}))
    while True:
        query = input("\nEnter a query : ")
        if query == 'exit':
            break
            
        # Get the answer from the chain
        input_text = GoogleTranslator(source = 'auto' , target = 'en').translate(query)
        res = qa(input_text)
        answer = res['result']
        
        # Print the result
        print("\n\n> Question : ")
        print(query)
        print("\n> Answer : ")
        print(GoogleTranslator(source = 'auto' , target = 'ar').translate(answer))
        
#         # Print the relevant sources used for the answer
#         print("------------------------------- SOURCE DOCUMENTS ----------------------------------")
#         for document in docs:
#             print("\n> " + document.metadata["source"] + ":")
#             print(document.page_content)
#         print("------------------------------- SOURCE DOCUMENTS ----------------------------------")

In [None]:
if __name__ == "__main__":
    main()

# Another Method

In [1]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import GPT4All
from deep_translator import GoogleTranslator

In [2]:
loader = PyPDFLoader("C:/Users/Alaa AI/Python Projects/Chatbots/GPT with local docs/SOURCE_DOCUMENTS/arabtesting_1_en.pdf")

In [3]:
documents = loader.load_and_split()

In [4]:
len(documents)

209

In [5]:
print(documents[0].page_content)

Chapter  One
Scale  for  developmental  
delay  and  severe  and  multiple  disabilities
1  Machine Translated by Google


In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(documents)

In [7]:
len(texts)

545

In [8]:
print(texts[2].page_content)

com.arabtesting.www:// http  
com.arabtesting@info  ÿÿÿÿÿÿÿÿÿ:  Machine Translated by Google


In [9]:
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [10]:
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
db = Chroma.from_documents(texts, embeddings, persist_directory="db-multilingual")

In [12]:
vectorstore = FAISS.from_documents(texts, embeddings)

In [13]:
system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
Read the given context before answering questions and think step by step. If you can not answer a user question based on 
the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""

In [14]:
def generate_prompt(prompt: str, system_prompt: str = system_prompt) -> str:
    return f"""
    [INST] <>
    {system_prompt}
    <>
    
    {prompt} [/INST]
    """.strip()

In [15]:
B_INST, E_INST = "<s>[INST] ", " [/INST]"
prompt_template = (
                B_INST
                + system_prompt
                + """
            
            Context: {context}
            User: {question}"""
                + E_INST
            )

In [16]:
question = "ممكن تكلمنى عن قانون تعليم الأطفال idea 'Respond in details'"
input_text = GoogleTranslator(source = 'auto' , target = 'en').translate(question)

In [17]:
print(input_text)

Can you talk to me about the Children's Education Law? idea 'Respond in details'


In [18]:
from langchain import HuggingFacePipeline, PromptTemplate

In [19]:
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [20]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
docs = retriever.get_relevant_documents(input_text)

In [21]:
llm = GPT4All(
    device = 'gpu',
    model="C:/Users/Alaa AI/Python Projects/Chatbots/GPT by me/alaa_ai_model_gptj_v1.6.gguf",
    max_tokens=2048,
    allow_download=False,
    backend="mistral",
    verbose=False,
)

In [22]:
qa_chain = RetrievalQA.from_chain_type(
  llm, retriever=retriever ,chain_type= "stuff", chain_type_kwargs={"prompt": prompt},
)
# qa_chain(question)

In [26]:
%%time
qa_chain(
    input_text
)

CPU times: total: 4min 51s
Wall time: 1min 19s


{'query': "Can you talk to me about the Children's Education Law? idea 'Respond in details'",
 'result': ''}

In [24]:
print(GoogleTranslator(source = 'auto' , target = 'ar').translate(res["result"]))




In [25]:
qa_chain(question)

{'query': "ممكن تكلمنى عن قانون تعليم الأطفال idea 'Respond in details'",
 'result': "\n</s>\n\nThe Education for Individuals with Difficulties Act 2004, also known as the Improvement of Education for Disabilities with Individuals Act, is a law that focuses on providing education to individuals with difficulties or disabilities. This act covers developmental delay and severe and multiple disabilities. The purpose of this law is to ensure that these children receive an appropriate education tailored to their needs.\n\nThe law applies from birth up to the age of 15, and in cases of a severe delay, it may be applied to larger loans. It takes into account that the child's subculture might not encourage long verbal responses, so the focus is on providing an education that caters to their specific needs and abilities."}

In [None]:
qa_chain("what is the figure in the document? Extract the answer from the text")