In [1]:
import json
with open('./credentials.json', 'r') as creds:
  credentials = json.load(creds)

In [2]:
import os
SAP_HANA_CLOUD_ADDRESS  = credentials["SAP_HANA_CLOUD"]["HANA_ADDRESS"]
SAP_HANA_CLOUD_PORT     = credentials["SAP_HANA_CLOUD"]["HANA_PORT"]
SAP_HANA_CLOUD_USER     = credentials["SAP_HANA_CLOUD"]["HANA_USER"]
SAP_HANA_CLOUD_PASSWORD = credentials["SAP_HANA_CLOUD"]["HANA_PASSWORD"]

In [3]:
import os
os.environ["AICORE_CLIENT_ID"]      = credentials["SAP_AI_CORE"]["AICORE_CLIENT_ID"]
os.environ["AICORE_CLIENT_SECRET"]  = credentials["SAP_AI_CORE"]["AICORE_CLIENT_SECRET"]
os.environ["AICORE_AUTH_URL"]       = credentials["SAP_AI_CORE"]["AICORE_AUTH_URL"]
os.environ["AICORE_RESOURCE_GROUP"] = credentials["SAP_AI_CORE"]["AICORE_RESOURCE_GROUP"]
os.environ["AICORE_BASE_URL"]       = credentials["SAP_AI_CORE"]["AICORE_BASE_URL"]   

In [4]:
import hana_ml.dataframe as dataframe
conn = dataframe.ConnectionContext(
                                   address  = SAP_HANA_CLOUD_ADDRESS,
                                   port     = SAP_HANA_CLOUD_PORT,
                                   user     = SAP_HANA_CLOUD_USER,
                                   password = SAP_HANA_CLOUD_PASSWORD, 
                                  )
conn.connection.isconnected()

True

In [5]:
from gen_ai_hub.proxy.langchain.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(proxy_model_name='text-embedding-ada-002')

In [6]:
user_question = 'What is the meaning of the letters SAP?'
user_question_embedding = embedding.embed_documents((user_question)) 
user_question_embedding[0][:10]

[0.017259458564609486,
 -0.01752117511658708,
 0.022659065838184198,
 -0.004883063162897407,
 -0.018444067385173547,
 -0.006157205147347661,
 -0.01662583137053135,
 -0.03633715314386116,
 0.01640543946499506,
 1.1931629495305218e-05]

In [7]:
user_question_embedding_str = str(user_question_embedding[0])
sql = f'''SELECT TOP 200 "AID", "QID", "QUESTION", COSINE_SIMILARITY("QUESTION_VECTOR", TO_REAL_VECTOR('{user_question_embedding_str}')) AS SIMILARITY
        FROM FAQ_QUESTIONS ORDER BY "SIMILARITY" DESC, "AID", "QID" '''
df_remote = conn.sql(sql)
df_remote.head(20).collect()


Unnamed: 0,AID,QID,QUESTION,SIMILARITY
0,1000,1,When was SAP founded?,1.0
1,1001,1,"What does the acronym ""SAP"" stand for?",1.0
2,1002,1,What is SAP’s vision and mission?,1.0
3,1003,1,What is the business outlook for the current f...,1.0
4,1005,1,Where can I find information about the SAP Exe...,1.0
5,1006,1,Why was SAP AG converted into an SE?,1.0
6,1007,1,What is SAP's current headcount?,1.0
7,1010,1,Why is SAP focusing on sustainability?,1.0
8,1011,1,What is SAP Cloud for Sustainable Enterprises?,1.0
9,1013,1,What is the impact of SAP Sustainability solut...,1.0


In [8]:
top_n = max(df_remote.filter('SIMILARITY > 0.95').count(), 10)
top_n

15

In [9]:
df_data = df_remote.head(top_n).select('AID', 'QID', 'QUESTION').collect()
df_data

Unnamed: 0,AID,QID,QUESTION
0,1000,1,When was SAP founded?
1,1001,1,"What does the acronym ""SAP"" stand for?"
2,1002,1,What is SAP’s vision and mission?
3,1003,1,What is the business outlook for the current f...
4,1005,1,Where can I find information about the SAP Exe...
5,1006,1,Why was SAP AG converted into an SE?
6,1007,1,What is SAP's current headcount?
7,1010,1,Why is SAP focusing on sustainability?
8,1011,1,What is SAP Cloud for Sustainable Enterprises?
9,1013,1,What is the impact of SAP Sustainability solut...


In [10]:
df_data['ROWID'] = df_data['AID'].astype(str) + '-' + df_data['QID'].astype(str) + ': '
df_data = df_data[['ROWID', 'QUESTION']]
df_data

Unnamed: 0,ROWID,QUESTION
0,1000-1:,When was SAP founded?
1,1001-1:,"What does the acronym ""SAP"" stand for?"
2,1002-1:,What is SAP’s vision and mission?
3,1003-1:,What is the business outlook for the current f...
4,1005-1:,Where can I find information about the SAP Exe...
5,1006-1:,Why was SAP AG converted into an SE?
6,1007-1:,What is SAP's current headcount?
7,1010-1:,Why is SAP focusing on sustainability?
8,1011-1:,What is SAP Cloud for Sustainable Enterprises?
9,1013-1:,What is the impact of SAP Sustainability solut...


In [11]:
candiates_str = df_data.to_string(header=False,
                                  index=False,
                                  index_names=False)
print(candiates_str)

1000-1:                                                                 When was SAP founded?
1001-1:                                                What does the acronym "SAP" stand for?
1002-1:                                                     What is SAP’s vision and mission?
1003-1:  What is the business outlook for the current fiscal year and your mid-term ambition?
1005-1:                           Where can I find information about the SAP Executive Board?
1006-1:                                                  Why was SAP AG converted into an SE?
1007-1:                                                      What is SAP's current headcount?
1010-1:                                                Why is SAP focusing on sustainability?
1011-1:                                        What is SAP Cloud for Sustainable Enterprises?
1013-1:                                   What is the impact of SAP Sustainability solutions?
1016-1:                                                     

In [12]:
AI_CORE_MODEL_NAME  = 'gpt-4-32k'


In [13]:
llm_prompt = f'''
Task: which of the following candidate questions is closest to this one?
{user_question}
Only return the ID of the selected question, not the question itself

-----------------------------------

Candidate questions. Each question starts with the ID, followed by a :, followed by the question
{candiates_str}
'''
print(llm_prompt)


Task: which of the following candidate questions is closest to this one?
What is the meaning of the letters SAP?
Only return the ID of the selected question, not the question itself

-----------------------------------

Candidate questions. Each question starts with the ID, followed by a :, followed by the question
1000-1:                                                                 When was SAP founded?
1001-1:                                                What does the acronym "SAP" stand for?
1002-1:                                                     What is SAP’s vision and mission?
1003-1:  What is the business outlook for the current fiscal year and your mid-term ambition?
1005-1:                           Where can I find information about the SAP Executive Board?
1006-1:                                                  Why was SAP AG converted into an SE?
1007-1:                                                      What is SAP's current headcount?
1010-1:                 

In [14]:
from gen_ai_hub.proxy.native.openai import chat
messages = [{"role": "system", "content": llm_prompt}
           ]
kwargs = dict(model_name=AI_CORE_MODEL_NAME, messages=messages)
response = chat.completions.create(**kwargs)
print(response)

ChatCompletion(id='chatcmpl-9CTtvcOzaLto5mZaawMcCMMzYznme', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='1001-1', role='assistant', function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1712761903, model='gpt-4-32k', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=4, prompt_tokens=298, total_tokens=302), prompt_filter_results=[{'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'detected': False, 'filtered': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}, 'prompt_index': 0}])


In [15]:
llm_response = response.choices[0].message.content
llm_response

'1001-1'

In [16]:
aid = qid = None
if len(llm_response.split('-')) == 2:
   aid, qid = llm_response.split('-')

   # From HANA Cloud get the question from the FAQ that matches the user request best
   df_remote = conn.table('FAQ_QUESTIONS').filter(f''' "AID" = '{aid}' AND "QID" = '{qid}' ''').select('QUESTION')
   matching_question = df_remote.head(5).collect().iloc[0,0]
    
   # From HANA Cloud get the predefined answer of the above question from the FAQ
   df_remote = conn.table('FAQ_ANSWERS').filter(f''' "AID" = '{aid}' ''').select('ANSWER')
   matching_answer = df_remote.head(5).collect().iloc[0,0]
else:
   matching_answer = "I don't seem to have an answer for that."
matching_answer

'"SAP" stands for Systems, Applications, and Products in Data Processing.'

In [17]:
print(f'The user question was: {user_question}\nThe selected questoin from the FAQ is: {matching_question}\nWith the answer: {matching_answer}')


The user question was: What is the meaning of the letters SAP?
The selected questoin from the FAQ is: What does the acronym "SAP" stand for?
With the answer: "SAP" stands for Systems, Applications, and Products in Data Processing.
