In [None]:
import gradio as gr
from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders.pdf_loader import PDFLoader

import os
from dotenv import load_dotenv

input_file_path = "data/IRM_Help.pdf"
db_path = "data/vectordb"

def initialize_data(pdf_path: str=input_file_path):
    loader = PDFLoader(file_path=pdf_path)
    data = loader.load()
    db = FAISS.from_documents(data, AzureOpenAIEmbeddings())
    db.save_local(db_path)

    new_db = FAISS.load_local(db_path, AzureOpenAIEmbeddings())
    llm = AzureChatOpenAI(model_name="gpt-35-turbo", temperature=0.5)
    
    global AMAZON_REVIEW_BOT    
    AMAZON_REVIEW_BOT = RetrievalQA.from_chain_type(llm,
                  retriever=db.as_retriever(search_type="similarity_score_threshold",
                    search_kwargs={"score_threshold": 0.7}))
    AMAZON_REVIEW_BOT.return_source_documents = True
    # qa_chain = RetrievalQA.from_chain_type(llm,
    #          retriever=new_db.as_retriever(search_type="similarity_score_threshold",
    #            search_kwargs={"score_threshold": 0.75}))
    # qa_chain.combine_documents_chain.verbose = True
    # qa_chain.return_source_documents = True

    return AMAZON_REVIEW_BOT

def chat(message, history):
    print(f"[message]{message}")
    print(f"[history]{history}")
    enable_chat = True

    ans = AMAZON_REVIEW_BOT({"query": message})
    if ans["source_documents"] or enable_chat:
        print(f"[result]{ans['result']}")
        print(f"[source_documents]{ans['source_documents']}")
        return ans["result"]
    else:
        return "I don't know."
    

def launch_ui():
    demo = gr.ChatInterface(
        fn=chat,
        title="Amazon Food Review",
        chatbot=gr.Chatbot(height=600),
    )

    demo.launch(share=True, server_name="0.0.0.0")

if __name__ == "__main__":
    os.environ["OPENAI_API_TYPE"] = "azure"
    os.environ["OPENAI_API_VERSION"] = "2023-05-15"
    os.environ["OPENAI_API_BASE"] = "https://pvg-azure-openai-uk-south.openai.azure.com/openai"
    env_path = os.getenv("HOME") + "/Documents/src/openai/.env"
    load_dotenv(dotenv_path=env_path, verbose=True)
    
    initialize_data()
    launch_ui()
    
    
   
def process_pdf(pdf_path):
    """
    处理 PDF 文档并生成文档表示

    Args:
        pdf_path: PDF 文档路径

    Returns:
        文档表示
    """

    # 加载 PDF 文档
    loader = PDFLoader(file_path=pdf_path)
    document = loader.load()

    # 使用滑动窗口或语义分割技术切分文档

    # 生成文本嵌入

    # 使用注意力机制来关注文档中重要的语义信息

    # 返回文档表示
    return document_representation


def save_embeddings(embeddings, vector_store_dir):
    """
    保存嵌入到向量存储中

    Args:
        embeddings: 文档嵌入
        vector_store_dir: 向量存储目录

    Returns:
        None
    """

    # 创建向量数据库索引

    # 插入数据到向量数据库