In [13]:
import streamlit as st
import os
import requests
import urllib.parse
from bs4 import BeautifulSoup
from dotenv import load_dotenv

from langchain_openai import AzureOpenAIEmbeddings,AzureChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

from openai import AzureOpenAI
from langchain.tools import tool
from langchain.agents import Tool, AgentExecutor, create_openai_tools_agent
from langchain import hub
# from pprint import pprint
# import pandas as pd



env_path = os.getenv("HOME") + "/Documents/src/openai/.env"
load_dotenv(dotenv_path=env_path, verbose=True)

split_doc_size = 1000
chunk_overlap = 50
pdf_file_name = 'data/IRM_Help.pdf'
work_dir = '/Users/I069899/Documents/study/AI/ai_anna/'
db_path =  "data/vectordb/"

os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://pvg-azure-openai-uk-south.openai.azure.com"


client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version="2023-05-15"
)


@tool
def search_by_online_sap_help_docs(query_content:str):

    """online search method: search from online.

    Args:
        query_content: content used to search
    """
    
    encoded_query = urllib.parse.quote_plus(query_content)
    url = f"https://help.sap.com/http.svc/elasticsearch?area=content&version=&language=en-US&state=PRODUCTION&q={encoded_query}&transtype=standard,html,pdf,others&product=&to=19&advancedSearch=0&excludeNotSearchable=1"
    print("*222222  in search_by_online_sap_help_docs")
    response = requests.get(url)
    if response.status_code == 200:
        responseJson = response.json()
        if responseJson.get('status') == 'OK':
            #data = response.get('data', {})
            data = responseJson.get('data', {})
            results = data.get('results', [])
            for i, result in enumerate(results, 1):
                snippet_soup = BeautifulSoup(result.get('snippet', 'N/A'), 'html.parser')
                search_result = f"Result {i}:\nTitle: {result.get('title', 'N/A')}\nDate: {result.get('date', 'N/A')}\nProduct: {result.get('product', 'N/A')}\nURL: https://help.sap.com{result.get('url', 'N/A')}\nSnippet: {snippet_soup.get_text()}\n"
                print("*222222  in search_by_online_sap_help_docs, search_result is ",search_result)
                return search_result
        else:
            search_result = "No results found by query from the help.sap.com"
            print("*222222  in search_by_online_sap_help_docs, search_result is ",search_result)
            return search_result
    else:
        search_result = "Request failed with status code {response.status_code}"
        print("*222222  in search_by_online_sap_help_docs, search_result is ",search_result)
        return search_result
    

#@st.cache_resource
@tool
def search_by_vectorDb(query_content:str):
    """Default search method: search from vector DB.

    Args:
        query_content: content used to search
    """
    loader = PyPDFLoader(os.path.join(work_dir, pdf_file_name))
    pages = loader.load()
    text_splitter = CharacterTextSplitter(separator ="\n",chunk_size=1000,chunk_overlap=150)
    split_docs = text_splitter.split_documents(pages)
    db = FAISS.from_documents(split_docs, AzureOpenAIEmbeddings())
    db.save_local(db_path)

    new_db = FAISS.load_local(db_path, AzureOpenAIEmbeddings())
    llm = AzureChatOpenAI(model_name="gpt-35-turbo", temperature=0.5)
    
    global AMAZON_REVIEW_BOT    
    AMAZON_REVIEW_BOT = RetrievalQA.from_chain_type(llm,
                retriever=new_db.as_retriever(search_type="similarity_score_threshold",
                    search_kwargs={"score_threshold": 0.75}))
                    #search_kwargs={"score_threshold": 0.5}))
    AMAZON_REVIEW_BOT.return_source_documents = True
    print("*11111111  in search_by_vectorDb, before invoke")
    ans = AMAZON_REVIEW_BOT.invoke({"query": query_content})
    #if ans["source_documents"] or enable_chat:
    if ans["source_documents"]:
        returnResult = ans["result"]
        print("11111111 in search_by_vectorDb, returnResult is ",returnResult)
        return returnResult
    else:
        returnResult = "I don't know."
        print("11111111 in search_by_vectorDb, returnResult is ",returnResult)
        return returnResult



model = AzureChatOpenAI(deployment_name="gpt-35-turbo")

tools = [
    Tool(
        name = "search_by_vectorDb",
        func = search_by_vectorDb.run,
        description = "default search engine. search from local vector DB. if can not find proper result, swith to online search"
    ),
    Tool(
        name = "search_by_online_sap_help_docs",
        func = search_by_online_sap_help_docs.run,
        description = "if there are no proper result by default search, use this function for online search."
    )
]

prompt = hub.pull("hwchase17/openai-tools-agent")
agent = create_openai_tools_agent(model, tools, prompt)
agent_executor = AgentExecutor(
    agent=agent, tools=tools, verbose=False, handle_parsing_errors=True
)
#agent_executor.invoke({"input": "team member"})
agent_executor.invoke({"input": "sap cloud SDK"})
    
 
  


*11111111  in search_by_vectorDb, before invoke




11111111 in search_by_vectorDb, returnResult is  I don't know.
*222222  in search_by_online_sap_help_docs
*222222  in search_by_online_sap_help_docs, search_result is  Result 1:
Title: @sap/cloud-sdk-vdm | SAP Cloud SDK for JavaScript / TypeScript
Date: 2021-05-19
Product: SAP Cloud SDK
URL: https://help.sap.com/doc/69202ef7e0a64767833782132648b855/1.0/en-US/modules/_sap_cloud_sdk_vdm.html
Snippet: Module @sap/cloud-sdk-vdm ... @sap/cloud-sdk-vdm ... @sap/cloud-sdk-vdm-bank-detail-service @sap/cloud-sdk-vdm-batch-service @sap/cloud-sdk-vdm-billing-document-request-service ... @sap/cloud-sdk-vdm-buffer-sizing-service @sap/cloud-sdk-vdm-buffer-profile-service @sap/cloud-sdk-vdm-business-area-service ... @sap/cloud-sdk-vdm-cost-center-service @sap/cloud-sdk-vdm-country-service @sap/cloud-sdk-vdm-credit-memo-request-service ... 



{'input': 'sap cloud SDK',
 'output': 'The SAP Cloud SDK is a comprehensive development kit for building applications on the SAP Cloud Platform. It provides tools and libraries for JavaScript and TypeScript developers. You can find more information about the SAP Cloud SDK for JavaScript/TypeScript in the following link: [SAP Cloud SDK for JavaScript / TypeScript](https://help.sap.com/doc/69202ef7e0a64767833782132648b855/1.0/en-US/modules/_sap_cloud_sdk_vdm.html)'}