In [2]:

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import UnstructuredPDFLoader
import os

from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.vectorstores import FAISS
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

from dotenv import load_dotenv
from openai import AzureOpenAI

from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI


max_token = 8000
split_doc_size = 1000
chunk_overlap = 50
pdf_file_name = 'data/IRM_Help.pdf'
work_dir = '/Users/I069899/Documents/study/AI/ai_anna/'
db_path =  "data/vectordb/"

env_path = os.getenv("HOME") + "/Documents/src/openai/.env"
load_dotenv(dotenv_path=env_path, verbose=True)

os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://pvg-azure-openai-uk-south.openai.azure.com"



# client = AzureOpenAI(
#   api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
#   api_version="2023-05-15"
# )
# client = AzureOpenAI(
#     azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
#     api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
#     api_version="2023-05-15"
# )
client = AzureOpenAI(
    base_url = os.getenv("AZURE_OPENAI_ENDPOINT"), 
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2023-05-15"
)


def load_pdf_splitter():
  loader = UnstructuredPDFLoader(os.path.join(work_dir, pdf_file_name))
  #loader = UnstructuredPDFLoader("/Users/I069899/Documents/study/AI/ai_anna/data/IRM_Help.pdf")
  docs = loader.load()
  text_splitter = CharacterTextSplitter(chunk_size=split_doc_size, chunk_overlap=chunk_overlap)
  split_docs = text_splitter.split_documents(docs)
  return split_docs



split_docs = load_pdf_splitter()
db = FAISS.from_documents(split_docs, AzureOpenAIEmbeddings())
db.save_local(db_path)
new_db = FAISS.load_local(db_path, AzureOpenAIEmbeddings())
llm = AzureChatOpenAI(model_name="gpt-35-turbo", temperature=0.5)
  

def initialize_data():
    split_docs = load_pdf_splitter()
    #db = FAISS.from_documents(split_docs, AzureOpenAIEmbeddings())
    db = FAISS.from_documents(split_docs, AzureOpenAIEmbeddings(base_url=os.getenv("AZURE_OPENAI_ENDPOINT")))
    db.save_local(db_path)

    #new_db = FAISS.load_local(db_path, AzureOpenAIEmbeddings())
    new_db = FAISS.load_local(db_path, AzureOpenAIEmbeddings(base_url=os.getenv("AZURE_OPENAI_ENDPOINT")))
    llm = AzureChatOpenAI(model_name="gpt-35-turbo", temperature=0.5)
    
    # qa_chain = RetrievalQA.from_chain_type(llm,
    #          retriever=new_db.as_retriever(search_type="similarity_score_threshold",
    #            search_kwargs={"score_threshold": 0.75}))
    # qa_chain.combine_documents_chain.verbose = True
    # qa_chain.return_source_documents = True
    # return qa_chain
    global AMAZON_REVIEW_BOT    
    AMAZON_REVIEW_BOT = RetrievalQA.from_chain_type(llm,
                  retriever=db.as_retriever(search_type="similarity_score_threshold",
                    search_kwargs={"score_threshold": 0.7}))
    AMAZON_REVIEW_BOT.return_source_documents = True
    return AMAZON_REVIEW_BOT

#initialize_data()

def chat(message, history):
    print(f"[message]{message}")
    print(f"[history]{history}")
    enable_chat = True
    # qa_chain = initialize_data()

    ans = AMAZON_REVIEW_BOT({"query": message})
    #ans = qa_chain({"query": message})
    if ans["source_documents"] or enable_chat:
        print(f"[result]{ans['result']}")
        print(f"[source_documents]{ans['source_documents']}")
        return ans["result"]
    else:
        return "I don't know."
    

def launch_ui():
    demo = gr.ChatInterface(
        fn=chat,
        title="Amazon Food Review",
        chatbot=gr.Chatbot(height=600),
    )

    demo.launch(share=True, server_name="0.0.0.0")

# if __name__ == "__main__":
#      initialize_data()
#      launch_ui()
    
    