# Building a Private AI Chatbot

# Importing Packages

In [1]:
# import dependencies
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

import os

from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain import HuggingFacePipeline
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
from deep_translator import GoogleTranslator

# Download the Model from HuggingFace:
download the Zephyr-7B-Alpha model

In [2]:
# specify model huggingface mode name
model_name = "anakin87/zephyr-7b-alpha-sharded"

# function for loading 4-bit quantized model
def load_quantized_model(model_name: str):
    """
    :param model_name: Name or path of the model to be loaded.
    :return: Loaded quantized model.
    """
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,
        quantization_config=bnb_config
    )
    return model

In [3]:
# fucntion for initializing tokenizer
def initialize_tokenizer(model_name: str):
    """
    Initialize the tokenizer with the specified model_name.

    :param model_name: Name or path of the model for tokenizer initialization.
    :return: Initialized tokenizer.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.bos_token_id = 1  # Set beginning of sentence token id
    return tokenizer

In [4]:
# load model
model = load_quantized_model(model_name)

# initialize tokenizer
tokenizer = initialize_tokenizer(model_name)

# specify stop token ids
stop_token_ids = [0]

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Downloading shards: 100%|██████████| 8/8 [18:49<00:00, 141.22s/it]
Loading checkpoint shards: 100%|██████████| 8/8 [02:06<00:00, 15.87s/it]


In [5]:
# load pdf files
loader = PyPDFDirectoryLoader("pdfs")
documents = loader.load()

In [6]:
# split the documents in small chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) #Chage the chunk_size and chunk_overlap as needed
all_splits = text_splitter.split_documents(documents)

In [7]:
# specify embedding model (using huggingface sentence transformer)
#embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
embedding_model_name = "intfloat/multilingual-e5-large"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs=model_kwargs)

In [8]:
#embed document chunks
vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db_multilingual")

In [9]:
# specify the retriever
retriever = vectordb.as_retriever()

In [10]:
# build huggingface pipeline for using zephyr-7b-alpha
pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        use_cache=True,
        device_map="auto",
        max_length=2048,
        do_sample=True,
        top_k=5,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
)

In [11]:
# specify the llm
llm = HuggingFacePipeline(pipeline=pipeline)

In [12]:
# build conversational retrieval chain with memory (rag) using langchain
def create_conversation(query: str, chat_history: list) -> tuple:
    try:

        memory = ConversationBufferMemory(
            memory_key='chat_history',
            return_messages=False
        )
        qa_chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=retriever,
            memory=memory,
            get_chat_history=lambda h: h,
        )

        result = qa_chain({'question': query, 'chat_history': chat_history})
        chat_history.append((query, result['answer']))
        return '', chat_history


    except Exception as e:
        chat_history.append((query, e))
        return '', chat_history

In [15]:
user_prompt = GoogleTranslator(source='auto', target='en').translate(input("User: "))

User:  اشرحلى شرح بنود المهارات الانفعالية الاجتماعية


In [18]:
print(create_conversation(user_prompt , []))

('', [('Explain to me the explanation of the social emotional skills items', "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nbasic  daily  performance  in  eating,  such  as  basic  behaviors  (swallowing,  chewing)  to  behaviors  \nthat  depend  largely  on  the  social  aspect,  such  as  (buying  \nfrom  the  supermarket).  Evaluates  performance  in  eating  and  meal  preparation  *This  subtest  includes  20  tasks\n3-  Entering  the  \nbathroom:  The  skills  of  entering  the  bathroom  mean  self-care  skills  that  support  basic  daily  performance  in  \nrelieving  oneself,  such  as  the  skills  of  understanding  the  need  to  relieve  oneself,  taking  down  clothes,  and  \nkeeping  \none’s  clothes  dry.  *This  sub-test  includes  18  tasks  that  evaluate  performance  in  using  the  toilet  and  bathroom.\n-4  Personal  care:  \nSelf-care  skill