In [None]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering, ConversationalPipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA, ConversationalRetrievalChain

import os
import sys
import torch

DATA_PATH = 'data' 
DB_FAISS_PATH = 'vectorstore/db_faiss'

In [None]:
import tensorflow as tf
tf.test.gpu_device_name()
print("GPU Available:", tf.config.list_physical_devices('GPU'))

from torch import cuda
cuda.is_available()

In [None]:
loader = PyPDFDirectoryLoader(DATA_PATH)
documents = loader.load()
# documents
# print(len(documents), documents[0].page_content[0:100])

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
splits = text_splitter.split_documents(documents)
print(len(splits), splits[0])

In [None]:
# embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
#                                        model_kwargs={'device': 'fpga'},
#                                        encode_kwargs = {'normalize_embeddings': False})

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       encode_kwargs = {'normalize_embeddings': False})

In [None]:
faiss_db = FAISS.from_documents(splits, embeddings)
faiss_db.save_local(DB_FAISS_PATH)

In [None]:
question = "What is insurance period?"
searchDocs = faiss_db.similarity_search(question, k=3)
print(searchDocs[0].page_content)

In [None]:
model_name = "meta-llama/Llama-2-7b-chat-hf"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForQuestionAnswering.from_pretrained(model_name)

from torch import bfloat16
import transformers

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

model_config = transformers.AutoConfig.from_pretrained(
    model_name,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_name,
    use_auth_token=hf_auth
)

In [11]:
# Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents.
retriever = faiss_db.as_retriever(search_kwargs={"k": 4})

docs = retriever.get_relevant_documents("What is insurance period?")
print(docs[0].page_content)

(chargeable, add area code before this number in case of mobile call) or email us at 'bagichelp@bajajallianz.co.in' *****
Damage Details:-
Kindly contact our nearest / local offices for No Claim Bonus Confirmations.
Insured Declared Value:
Policy Year Tenure From Tenure To Total Sum Insured
The schedule of age wise Insured Declared Value (IDV) as shown in the above table is applicable for the purpose of Total Loss including Theft/ Con-


In [13]:
# model_name = "meta-llama/Llama-2-7b-hf"
# Load the tokenizer associated with the specified model
# tokenizer = AutoTokenizer.from_pretrained(model_name, padding=True, truncation=True, max_length=512)

# Define a question-answering pipeline using the model and tokenizer
# question_answerer = pipeline(
#     "question-answering", 
#     model=model_name, 
#     tokenizer=tokenizer,
#     return_tensors='pt',
#     torch_dtype=torch.float16,
#     device_map="auto"
# )

question_answerer = pipeline(
    "question-answering", 
    model=model_name, 
    tokenizer=tokenizer,
    return_tensors='pt'
)

res = question_answerer("Explain to me the difference between nuclear fission and fusion.")
print(res[0])

# Create an instance of the HuggingFacePipeline, which wraps the question-answering pipeline
# with additional model-specific arguments (temperature and max_length)
llm = HuggingFacePipeline(
    pipeline=question_answerer,
    model_kwargs={"temperature": 0.2, "max_length": 512},
)

KeyboardInterrupt: 

In [None]:
# Create a question-answering instance (qa) using the RetrievalQA class.
# It's configured with a language model (llm), a chain type "refine," the retriever we created, and an option to not return source documents.
qa = RetrievalQA.from_chain_type(
qa = RetrievalQA.from_llm(
    llm=llm, 
    retriever=retriever, 
    return_source_documents=True
)

qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever, 
    return_source_documents=True    
)

In [None]:
chat_history = []
while True:
    query = input('Prompt: ')
    if query.lower() in ["exit", "quit", "q"]:
        print('Exiting')
        sys.exit()
    # result = qa_chain({'question': query, 'chat_history': chat_history})
        
    qa_input = {'question': query, 'chat_history': chat_history}
    result = qa(qa_input)
    print('Answer: ' + result['answer'] + '\n')
    chat_history.append((query, result['answer']))