In [12]:
!pip install langchain -q --user 

In [13]:
!pip install transformers -q --user 

In [14]:
!pip install pypdf -q --user

In [15]:
!pip install sentence-transformers -q --user

In [16]:
!pip install chromadb -q --user

In [17]:
!pip install google-cloud -q --user

In [18]:
!pip install intel_extension_for_pytorch --user



In [19]:
import logging
import os
import random
import re

In [20]:
import warnings

# Suppress warnings for a cleaner output
warnings.filterwarnings("ignore")

In [21]:
import torch

In [22]:
import intel_extension_for_pytorch as ipex

In [23]:
# Import necessary modules and classes
from langchain.vectorstores import Chroma
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM 
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [24]:
def get_device_type():
    # random seed
    if torch.cuda.is_available():
        print("GPU is available")
        seed = 88
        random.seed(seed)
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        return torch.device("gpu")
    elif torch.xpu.is_available():
        print("GPU is not available")
        print("XPU is available")
        seed = 88
        random.seed(seed)
        torch.xpu.manual_seed(seed)
        torch.xpu.manual_seed_all(seed)
        return torch.device("xpu")
    else:
        print("XPU or GPU not available - returning with CPU")
        return torch.device("cpu")


In [25]:
device_type = get_device_type()
print(device_type)

XPU or GPU not available - returning with CPU
cpu


In [28]:
MODEL_CACHE_PATH = "/home/common/data/Big_Data/GenAI/llm_models"

class ChatBotModel:
    """
    ChatBotModel is a class for generating responses based on text prompts using a pretrained model.

    Attributes:
    - model_id_or_path: model Id for text generation. Default is ""MBZUAI/LaMini-Flan-T5-783M""
    - torch_dtype: The data type to use in the model.
    - optimize : If True Intel Optimizer for pytorch is used.
    """

    def __init__(
        self,
        model_id_or_path: str = "MBZUAI/LaMini-Flan-T5-783M",  
        torch_dtype: torch.dtype = torch.bfloat16,
        optimize: bool = True,
    ) -> None:
        """
        The initializer for ChatBotModel class.

        Parameters:
        - model_id_or_path: The identifier or path of the pretrained model.
        - torch_dtype: The data type to use in the model. Default is torch.bfloat16.
        - optimize: If True, ipex is used to optimized the model
        """
        self.torch_dtype = torch_dtype
        self.device = get_device_type()
        self.model_id_or_path = model_id_or_path
        local_model_id = self.model_id_or_path.replace("/", "--")
        local_model_path = os.path.join(MODEL_CACHE_PATH, local_model_id)
        

        if (
            self.device == self.device.startswith("xpu")
            if isinstance(self.device, str)
            else self.device.type == "xpu"
        ):

            self.autocast = torch.xpu.amp.autocast
        else:
            self.autocast = torch.cpu.amp.autocast
        self.torch_dtype = torch_dtype
        print(f"torch_dtype : {self.torch_dtype}")
        print(f"device : {self.device}")
        print(f"model_id_or_path : {self.model_id_or_path}")
        print(f"local_model_id : {local_model_id}")
        print(f"local_model_path : {local_model_path}")
        try:
            # Initialize the tokenizer and base model for text generation
            self.tokenizer = AutoTokenizer.from_pretrained(local_model_path, 
                                                           trust_remote_code=True )
            self.model = (
                        AutoModelForSeq2SeqLM.from_pretrained(
                            local_model_path,
                            device_map="auto",
                            low_cpu_mem_usage=True,
                            trust_remote_code=True,
                            torch_dtype=torch.float32
                        )
                        .to(self.device)
                        .eval()
                        )
        except (OSError, ValueError, EnvironmentError) as e:
            logging.info(
                f"Tokenizer / model not found locally. Downloading tokenizer / model for {self.model_id_or_path} to cache...: {e}"
            )
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_id_or_path, trust_remote_code=True )
            self.model = (
                        AutoModelForSeq2SeqLM.from_pretrained(
                            self.model_id_or_path,
                            device_map="auto",
                            low_cpu_mem_usage=True,
                            trust_remote_code=True,
                            torch_dtype=torch.float32
                        )
                        .to(self.device)
                        .eval()
                        )
           
        self.max_length = 256
        if optimize:
            if hasattr(ipex, "optimize_transformers"):
                try:
                    ipex.optimize_transformers(self.model, dtype=self.torch_dtype)
                except:
                    ipex.optimize(self.model, dtype=self.torch_dtype)
                else:
                    ipex.optimize(self.model, dtype=self.torch_dtype)
        print("ChatBotModel Initialization Complete")

In [30]:
chat_bot_model = ChatBotModel()

2023-12-06 09:52:51,388 - root - INFO - Tokenizer / model not found locally. Downloading tokenizer / model for MBZUAI/LaMini-Flan-T5-783M to cache...: Repo id must be in the form 'repo_name' or 'namespace/repo_name': '/home/common/data/Big_Data/GenAI/llm_models/MBZUAI--LaMini-Flan-T5-783M'. Use `repo_type` argument if needed.


XPU or GPU not available - returning with CPU
torch_dtype : torch.bfloat16
device : cpu
model_id_or_path : MBZUAI/LaMini-Flan-T5-783M
local_model_id : MBZUAI--LaMini-Flan-T5-783M
local_model_path : /home/common/data/Big_Data/GenAI/llm_models/MBZUAI--LaMini-Flan-T5-783M
ChatBotModel Initialization Complete


# Getting Data From PDFs - Save in Vector DB

In [31]:
# Initialize a directory loader to load PDF documents from a directory# Define the directory where the Chroma database will persist its data
persist_directory = "choma-db"

# Make sure the directory exists, create if it doesn't

if not os.path.exists(persist_directory):
    os.makedirs(persist_directory)

# Initialize a directory loader to load PDF documents from a directory
loader = DirectoryLoader("law_data", glob="./*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()

# Initialize a text splitter to split documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)

# Split the loaded documents into chunks
texts = text_splitter.split_documents(documents)


In [32]:
# Creating a Vector DB using Chroma DB and SentenceTransformerEmbeddings
# Initialize SentenceTransformerEmbeddings with a pre-trained model
embeddings = SentenceTransformerEmbeddings(model_name="multi-qa-mpnet-base-dot-v1")
# Create a Chroma vector database from the text chunks
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)

2023-12-06 09:57:35,950 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: multi-qa-mpnet-base-dot-v1
2023-12-06 09:57:38,427 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device: cpu
2023-12-06 09:57:38,764 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


Batches:   0%|          | 0/474 [00:00<?, ?it/s]

In [33]:
# To save and load the saved vector db (if needed in the future)
# Persist the database to disk
db.persist()
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

# RetrievalQA chain

In [34]:
# Create a text generation pipeline
pipe = pipeline(
    'text2text-generation',
    model = chat_bot_model.model,
    tokenizer = chat_bot_model.tokenizer,
    max_length = 512,
    do_sample = True,
    temperature = 0.3,
    top_p= 0.95
)

In [35]:
# Initialize a local language model pipeline
local_llm = HuggingFacePipeline(pipeline=pipe)
# Create a RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=local_llm,
    chain_type='stuff',
    retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 2}),
    return_source_documents=True,
)

# User Interactions 

In [39]:
def chat_bot_front_end():
    while True:
        # Prompt the user for a query
        input_query = str(input("Enter your query (or 'EXIT' to stop): "))
    
        # Check if the user entered 'EXIT' to stop the program
        if input_query.upper() == 'EXIT':
            print("Exiting...")
            break
    
        # Execute the query using the QA chain
        # Assuming qa_chain is a predefined function or mechanism to process the query
        llm_response = qa_chain({"query": input_query})
    
        # Print the response
        print(llm_response['result'])


In [None]:
chat_bot_front_end()

Enter your query (or 'EXIT' to stop):  what is IPC


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

IPC is a general provision in the Indian Penal Code, 1860 that deals with attempts to commit offenses and makes them punishable with imprisonment, not only those punishable for life or death.


Enter your query (or 'EXIT' to stop):  More information of IPC


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

IPC stands for Indian Penal Code.


Enter your query (or 'EXIT' to stop):  What is Article


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The article is helpful in ascertaining the intention of the accused Hindu who ridicules the Prophet of the Mohammedans.


Enter your query (or 'EXIT' to stop):  What is Section


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Section is (1) intention or knowledge relating to commission of murder; and (2) the doing of an act towards it.


Enter your query (or 'EXIT' to stop):  how many sections are there


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

There are two sections mentioned in the context.


Enter your query (or 'EXIT' to stop):  name few Indian Penal Code


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The Indian Penal Code is not mentioned in the provided context.


Enter your query (or 'EXIT' to stop):  who drafted Indian penal Code ?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The Indian Penal Code was drafted by the First Indian Law Commission presided over by Lord Thomas Babington Macaulay.


Enter your query (or 'EXIT' to stop):  Explain more of Contempt of Supreme Court and High Courts


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Contempt of Supreme Court and High Courts refers to the act of defying the jurisdiction, powers, and authority of the courts, except those contained in the Contempt of Courts Act, 1971.94.


Enter your query (or 'EXIT' to stop):  Give more of Section228 0f IPC


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

I'm sorry, but the given context does not provide any information about Section228 of IPC. Therefore, I cannot provide a helpful answer to the question.


Enter your query (or 'EXIT' to stop):  Give more of Section 228 of IPC


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Section 228 of IPC, 1860 makes disclosure of identity of victim of certain offences punishable. Printing or publishing name of any matter which may make known the identity of any person against whom an offence under sections 376, 376A, 376B, 376C or 376D is alleged or found to have been committed can be punished. Keeping in view the social object of preventing social victimisation or ostracism of the victim of a.
