In [None]:
# Necessary imports in case they are not present already
# uncomment the next 2 lines

#import sys
#!{sys.executable} -m pip install numpy==1.26.4 pandas generative-ai-hub-sdk[all] hana_ml

In [None]:
MY_NAME = 'SJ'

In [None]:
import pandas as pd
import numpy as np
import json

try:
    with open('secrets/hanakey.json', 'r') as file:
        secret = json.load(file)
except:
    print('Error reading JSON file hanakey.json')

In [None]:
# Set up AI Core OpenAI Langchain Proxy connection using generative-ai-hub-sdk
from gen_ai_hub.proxy.langchain.openai import OpenAIEmbeddings
embed = OpenAIEmbeddings(deployment_id='d98a03280503e047')

In [None]:
# Create a connection using hana-ml
from hana_ml import ConnectionContext

# cc = ConnectionContext(userkey='VDB_BETA', encrypt=True) # when using key from hdbuserstore
cc= ConnectionContext(
    address=secret['host'], 
    port=secret['port'], 
    user=secret['user'], 
    password=secret['password'], 
    encrypt=True
    )
connection = cc.connection

print(cc.hana_version())
print(cc.get_current_schema())

In [None]:
df = pd.read_csv('data_en.csv')
df.head(18)

In [None]:
from langchain_community.vectorstores.hanavector import HanaDB
# creates a table if it does not exists yet
db = HanaDB(
    embedding=embed, connection=connection, table_name="EMBEDDINGS_" + MY_NAME, vector_column_length=3072
)

In [None]:
# Delete already existing documents from the table
db.delete(filter={})

In [None]:
# Create Langchain Documents to store in HANA DB, add metadata if neccessary
from langchain.docstore.document import Document
documents = [Document(page_content=rawdata, metadata={"type":"material"}) for rawdata in list(df['Material'])]

In [None]:
# add the documents to the database table (automatically creates embeddings for them)
db.add_documents(documents)

In [None]:
# take a look at the table
hdf = cc.sql(''' SELECT "VEC_TEXT", "VEC_META", TO_NVARCHAR("VEC_VECTOR") AS "VEC_VECTOR" FROM "EMBEDDINGS_SJ" ''')
#hdf = cc.sql(''' SELECT COUNT(*) FROM "EMBEDDINGS_SJ"''')
localdf = hdf.head(10).collect()
localdf

In [None]:
query = "I need a mirror"
#query = "Es werde licht"
#query = "Türen"

# do a similarity search on the database
docs = db.similarity_search_with_relevance_scores(query, k=10, score_threshold=0.1, filter={"type":"material"})
print(f"Found {len(docs)} matching items for the query {query}")

# print results
docdata = [{"Item": doc[0].page_content, "Metadata": doc[0].metadata, "Similarity Score": doc[1]} for doc in docs]
resultdf = pd.DataFrame(data=docdata)
resultdf