In [1]:
import os
from dotenv import load_dotenv
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain.chains import RetrievalQA

# Define constants
MAX_TOKEN = 8000
SPLIT_DOC_SIZE = 1000
CHUNK_OVERLAP = 50
PDF_FILE_NAME = 'data/IRM_Help.pdf'
WORK_DIR = '/Users/I069899/Documents/study/AI/ai_anna/'
DB_PATH = "data/vectordb/"

# Load environment variables
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"
os.environ["OPENAI_API_BASE"] = "https://pvg-azure-openai-uk-south.openai.azure.com/openai"
ENV_PATH = os.getenv("HOME") + "/Documents/src/openai/.env"
load_dotenv(dotenv_path=ENV_PATH, verbose=True)

def load_pdf_and_split_text():
    """Load the PDF file and split the text into manageable chunks."""
    loader = PyPDFLoader(os.path.join(WORK_DIR, PDF_FILE_NAME))
    pages = loader.load()
    text_splitter = CharacterTextSplitter(separator ="\n",chunk_size=SPLIT_DOC_SIZE,chunk_overlap=CHUNK_OVERLAP)
    documents = text_splitter.split_documents(pages)
    return documents

def initialize_data():
    """Initialize the system by creating the document database and setting up the QA bot."""
    documents = load_pdf_and_split_text()
    vector_db = FAISS.from_documents(documents, AzureOpenAIEmbeddings())
    vector_db.save_local(DB_PATH)

    loaded_db = FAISS.load_local(DB_PATH, AzureOpenAIEmbeddings())
    language_model = AzureChatOpenAI(model_name="gpt-35-turbo", temperature=0.5)
    
    global AMAZON_REVIEW_BOT    
    AMAZON_REVIEW_BOT = RetrievalQA.from_chain_type(language_model,
                  retriever=loaded_db.as_retriever(search_type="similarity_score_threshold",
                    search_kwargs={"score_threshold": 0.5}))
                
    AMAZON_REVIEW_BOT.return_source_documents = True
    return AMAZON_REVIEW_BOT

def search_and_print_result(query):
    """Search for the query and print the result."""
    search_result = initialize_data()

    ###
#     answer, docs = search_result["result"], res["source_documents"]
# for document in docs:
# print("\n> [source file path]: " + document.metadata["source"] )
# print("> [cosine similarity]:" + str(1.0-document.metadata["distances"]) ) # Add This and it will print 1-distance_value
# print(">[text slice]:" + document.page_content)
    ###

    search_result = search_result({"query": query})
    answer = search_result["result"]
    docs = search_result["source_documents"]
    if docs:
        #print("Source document: ", docs)
        # for document in docs:
        #     print("*********** source is : " + document.metadata["source"] )
        #     #print("********* page is : " + document.metadata["page"] )
        #     #print("********* page is : " + document.metadata )
        #     #print("> [cosine similarity]:" + str(1.0-document.metadata["distances"]) ) 
        #     print(">[text slice]:" + document.page_content)
        print("***** Answer is : ", answer)
    else:
        print("No answer found.")
        
def check_result(result):
    if "I'm sorry, I don't have enough information" in result:
        return "no_result_found"
    else:
        return result

result = "I'm sorry, I don't have enough information to understand your request."
print(check_result(result))  # prints: no_result_found

search_and_print_result("Ying qinmin")

  warn_deprecated(


Answer:  I'm sorry, I don't have enough information to understand your request. Can you please provide more context or clarify your question?
