In [1]:
%%time
! pip install  -U -qq langchain tiktoken pypdf faiss-gpu
! pip install  -U -qq InstructorEmbedding sentence_transformers

CPU times: user 279 ms, sys: 80.7 ms, total: 360 ms
Wall time: 24.9 s


## Import Stuff

In [2]:
import re
import os
import torch
import random
import numpy as np
import pandas as pd
from operator import itemgetter

import langchain
from langchain.schema import format_document
from langchain.schema.messages import get_buffer_string
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate, LLMChain
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from InstructorEmbedding import INSTRUCTOR
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough,RunnableParallel
from langchain.schema.messages import HumanMessage, SystemMessage, AIMessage

import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

  from tqdm.autonotebook import trange


In [3]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONASSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(seed=42)

import locale
locale.getpreferredencoding = lambda: "UTF-8"

## ChatBot

In [9]:
class ChatBot:
    
    def __init__(self,
                 model,
                 tokenizer,
                 embeddings,
                 pdf_path = None,
                 chat_history = [],
                 max_len = 1000,
                 temperature = 0,
                 top_p = 0.95,
                 repetition_penalty = 1.15,
                 split_chunk_size = 800,
                 split_overlap = 0,
                 k = 3,
                 device = "cuda",
                 do_sample = True,
                 ## vector db
                 vector_db_save_path = "faiss_index_hp",
                 vector_db_load_path = None,
                 ## templates
                 answer_template = None,
                 condensed_question_template = None,
                 pipe = None,
                ):
        self.model = model
        self.tokenizer = tokenizer
        self.embeddings = embeddings
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        self.pdf_path = pdf_path
        self.chat_history = chat_history
        self.max_len = max_len
        self.temperature = temperature
        self.top_p = top_p
        self.repetition_penalty = repetition_penalty
        self.split_chunk_size = split_chunk_size
        self.split_overlap = split_overlap
        self.k = k
        self.device = device
        self.do_sample = do_sample
        self.vector_db_save_path = vector_db_save_path
        self.vector_db_load_path = vector_db_load_path
        
        
        # templates
        if answer_template is None:
            self.answer_template  = """Answer the question based only on the following context:
                                        {context}

                                        Question: {question}
                                    """
        self.answer_prompt = ChatPromptTemplate.from_template(self.answer_template)
        
        if condensed_question_template is None:
            self.condensed_question_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

                                                Chat History:
                                                {chat_history}
                                                Follow Up Input: {question}
                                                Standalone question:
                                                """
        
        self.condensed_question_prompt = PromptTemplate.from_template(self.condensed_question_template)
        
        ## llm pipeline 
        if pipe is None:
            pipe = pipeline(
                task = "text-generation",
                model = self.model,
                tokenizer = self.tokenizer,
                pad_token_id = self.tokenizer.eos_token_id,
                max_length = self.max_len,
        #             temperature = self.temperature,
        #             top_p = self.top_p,
        #             do_sample=self.do_sample,
                repetition_penalty = self.repetition_penalty
            )

        self.llm = HuggingFacePipeline(pipeline = pipe)
        
    def load_pdf(self):
        loader = DirectoryLoader(
            self.pdf_path,
            glob="./*.pdf",
            loader_cls=PyPDFLoader,
            show_progress=True,
            use_multithreading=True
        )

        self.documents = loader.load()

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size = self.split_chunk_size,
            chunk_overlap = self.split_overlap
        )

        self.texts = text_splitter.split_documents(self.documents)        
        
    
    def create_vector_db(self):
        self.vectordb = FAISS.from_documents(
            documents = self.texts, 
            embedding = self.embeddings
        )

        self.vectordb.save_local(self.vector_db_save_path)
            
    def load_vector_db(self):
        self.vectordb = FAISS.load_local(
            self.vector_db_load_path,
            self.embeddings
        )
        
    def create_chat_qa_chain(self):
        
        if self.vector_db_load_path:
            self.load_vector_db()
        else:
            if self.pdf_path is not None:
                self.load_pdf()
                self.create_vector_db()
                self.vector_db_load_path = self.vector_db_save_path
                self.load_vector_db()
        
        retriever = None
        if hasattr(self,"vectordb"):
            retriever = self.vectordb.as_retriever(search_kwargs = {"k": self.k, "search_type" : "similarity"})
        
        _inputs = RunnableParallel(
                    standalone_question=RunnablePassthrough.assign(
                        chat_history=lambda x: get_buffer_string(x["chat_history"])
                    )
                    | self.condensed_question_prompt
                    | self.llm
                    | StrOutputParser(),
                )
        if retriever is None:
            _context = {
                "context": itemgetter("standalone_question"),
                "question": lambda x: x["standalone_question"],
            }
        else:
            _context = {
                "context": itemgetter("standalone_question") | retriever,
                "question": lambda x: x["standalone_question"],
            }
        
        self.chain = (
                _inputs
                | _context
                | self.answer_prompt
                | self.llm
                |StrOutputParser()
        )
    
    def clean_response(self,response):
        return response.replace("Answer:","").strip()
    
    def start_chat(self):
        self.create_chat_qa_chain()
        
        while True:
            user_input = input("You:")
            if user_input.lower().strip() == "bye":
                break
            model_input = {
                "question": user_input,
                "chat_history": self.chat_history,
            }
            response = self.chain.invoke(model_input)
            print("AI:",self.clean_response(response))
            #giving context of only last two responses
            self.chat_history = self.chat_history[-2:]
            self.chat_history.extend(
                [
                    HumanMessage(content=user_input),
                    AIMessage(content=self.clean_response(response))
                ]
            )
            print()
            

In [10]:
config = {
    "seed":42,
    "model_path":"mistralai/Mistral-7B-Instruct-v0.1",
    "embeddings_model_path":"sentence-transformers/all-MiniLM-L6-v2",
    "pdf_path":"/kaggle/input/rag-testing/",
    "vector_db_save_path":"faiss_index_hp",
    "vector_db_load_path":"faiss_index_hp",
    "device":"cuda"
}

In [6]:
model = AutoModelForCausalLM.from_pretrained(config["model_path"],
                                            device_map='auto',
                                            torch_dtype=torch.float16,
                                            low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained(config["model_path"])
embeddings = HuggingFaceInstructEmbeddings(model_name = config["embeddings_model_path"],
                                           model_kwargs = {"device":config["device"]})

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

load INSTRUCTOR_Transformer
max_seq_length  512


In [11]:
chat_bot = ChatBot(model,
                   tokenizer,
                   embeddings,
                   pdf_path=config["pdf_path"],
                  )

In [12]:
chat_bot.start_chat()

100%|██████████| 1/1 [00:08<00:00,  8.09s/it]


You: What is the name of the book ?


AI: The Network State



You: Who wrote this book ?


AI: Balaji Srinivasan



You: Give me detailed summary of this book


AI: "The Network State" is a book written by Balaji Srinivasan that discusses the concept of network states. According to the author, a network state is an organization that operates fully digitally but has real-world capabilities, such as money and a purpose. It can exist as a seed startup or a public company that has been recognized by at least one legacy state. The book argues that network states are feasible and desirable, but not inevitable, and provides examples of how they



You: Give me names of all the chapters in this book


AI: Chapter 1: Introduction
                                                    Chapter 2: The Nation State System
                                                    Chapter 3: The Digital Revolution
                                                    Chapter 4: The Network State System
                                                    Chapter 5: From Nation States to Network States
                                                    Chapter 6: Conclusion



You: give me detailed summary of chapter 2


AI: Chapter 2 is not mentioned in the provided context.



You: Name of the chapter 2 is The nation state system




AI: Chapter 2 does not have a name mentioned in the provided context.



You: bye
