In [None]:
from langchain.document_loaders import UnstructuredExcelLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from tqdm.autonotebook import tqdm, trange
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain_core.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain

In [None]:
DB_PATH = "vectorstore/db_faiss"
LLM = 'phi3:mini'
DATA = 'Data/synth.xlsx'

In [None]:
loader = UnstructuredExcelLoader(DATA)
docs = loader.load()

In [None]:
docs

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
text_chunks = text_splitter.split_documents(docs)
len(text_chunks)

In [None]:
embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')

In [None]:
docsearch = FAISS.from_documents(text_chunks, embeddings)
docsearch.save_local(DB_PATH)

In [None]:
# PERFORMANCE CHECK USING SIMILARITY SEARCH 
# query = 'what is the type of  Flagship 46050 Hotel Roof Top?'
query = 'What is the Published Price of Royal Residency?'
results = docsearch.similarity_search(query, k = 3)
results

In [None]:
llm = Ollama(
    model = LLM,
    temperature=0.1
    )

In [None]:
template = (
    ''' 
    You will be provided information about Hotels and you have to reply to user's query fetching information from the same.
    Below you will be given a list of data of Hotels in the format:
    'id'
    'Hotel Name'
    'Star Rating'
    'Latitude'
    'Longitude'
    'Currency Code'
    'Room Price'
    'Tax'
    'Extra Guest Charge'
    'Child Charge'
    'Other Charges'
    'Discount'
    'type'
    'Base Rate'
    'Base Rate Rounded Off'
    'Published Price'
    'Published Price Rounded Off'
    'Offered Price'
    'Offered Price Rounded Off'
    'Agent Commission'
    'Agent Mark Up'
    'Service Tax'
    'TCS'
    'TDS'
    'Service Charge'
    'Total GST Amount'
    'CGST Amount'
    'CGST Rate'
    'Cess Amount'
    'Cess Rate'
    'IGST Amount'
    'IGST Rate'
    'SGST Amount'
    'SGST Rate'
    'Taxable Amount' 

    {context}
    
    {chat_history}
    The follow up question by user is: {question}
    '''
)
prompt = PromptTemplate.from_template(template)

In [None]:
qa = ConversationalRetrievalChain.from_llm(llm = llm, retriever = docsearch.as_retriever(), combine_docs_chain_kwargs={"prompt": prompt}, verbose = True)

In [None]:
# TEMPORARY UI
import sys
while True:
    chat_history = []
    query = input(f"Input Prompt: ")
    print("QUERY: ", query)
    if query == 'exit':
        print('Exiting')
        sys.exit()
    if query == '':
        continue
    result = qa({"question": query, "chat_history": chat_history})
    print("RESPONSE: ", result['answer'])

----------------------------------------------------------------------