In [1]:
import neo4j

host = "bolt://localhost"
username = "neo4j"
password = "1234qwer"

driver = neo4j.GraphDatabase.driver(f'{host}', auth=(username, password))

session = driver.session()

In [2]:
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()

client = OpenAI()

In [3]:
def get_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [11]:
import pandas as pd

records, summary, key = driver.execute_query("""
MATCH (n:CORE|HARDWARE|PLATFORM|USER|VERB) return n.name as name
""")
data = []
for record in records:
    if record['name'] != "":
        data.append(record['name'])

df = pd.DataFrame(data, columns=['name'])
df.head()

Unnamed: 0,name
0,includes
1,credit card detail
2,address
3,ssl certificate
4,secure


In [12]:
df['embedding'] = df.name.apply(lambda x: get_embedding(x))

In [13]:
df.to_csv('output/entity_embedding_llm.csv', index=False)

In [14]:
import numpy as np

read_df = pd.read_csv('output/entity_embedding_llm.csv')
read_df['embedding'] = read_df.embedding.apply(eval).apply(np.array)

In [15]:
for index, row in read_df.iterrows():
    session.run("MATCH (n:CORE|HARDWARE|PLATFORM|USER|VERB) WHERE n.name = $name SET n.embedding = $embedding", name=row['name'], embedding=row['embedding'])

In [16]:
from transformers import BertTokenizer, BertModel

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

  from .autonotebook import tqdm as notebook_tqdm


In [17]:
def bert_embedding(text):
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    pooler_output = output['pooler_output'][0]
    return pooler_output.detach().numpy()

In [None]:
for index, row in read_df.iterrows():
    embedding = bert_embedding(row['name'])
    session.run("MATCH (n:CORE|HARDWARE|PLATFORM|USER|VERB) WHERE n.name = $name SET n.embedding = $embedding", name=row['name'], embedding=row['embedding'])

<neo4j._sync.work.result.Result at 0x2ac765550>

In [30]:
import psycopg2

pg_host = "localhost"
pg_database = "graph"
pg_user = "postgres"
pg_password = "postgres"

# Establish PostgreSQL connection
pg_conn = psycopg2.connect(
    host=pg_host,
    database=pg_database,
    user=pg_user,
    password=pg_password
)
pg_cursor = pg_conn.cursor()

In [32]:
records, summary, key = driver.execute_query("""
CALL gds.knn.stream('myGraph2', {
    topK: 2,
    nodeProperties: {`fastrp-embedding`: 'EUCLIDEAN'},
    // The following parameters are set to produce a deterministic result
    randomSeed: 1337,
    concurrency: 1,
    sampleRate: 1.0,
    deltaThreshold: 0.0
})
YIELD node1, node2, similarity
WHERE gds.util.asNode(node1).sentence is not null and gds.util.asNode(node2).sentence is not null
RETURN gds.util.asNode(node1).id AS Req1, gds.util.asNode(node2).id AS Req2, similarity
ORDER BY similarity DESCENDING, Req1, Req2
""")

for record in records:
    pg_cursor.execute("INSERT INTO result_graph (source_id, target_id, similarity, method, embedding) VALUES (%s, %s, %s, %s, %s)", (record[0], record[1], record[2], 'EUCLIDEAN', 'BERT'))
    pg_conn.commit()