In [None]:
from neo4j import GraphDatabase
from sentence_transformers import SentenceTransformer

# Kredensial Neo4j
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "password"

# Inisialisasi driver Neo4j
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

# Inisialisasi model embedding
embedder = SentenceTransformer('sentence-transformers/distiluse-base-multilingual-cased-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def fetch_restaurants(tx):
    query = "MATCH (r:Restaurant) RETURN r.r_id AS r_id, r.restaurant AS name"
    return list(tx.run(query))

with driver.session() as session:
    restaurants = session.execute_read(fetch_restaurants)

In [3]:
restaurants

[<Record r_id=1 name=' Clouds Coffee'>,
 <Record r_id=2 name=' Mie Jeder'>,
 <Record r_id=3 name='AHA by Kudos'>,
 <Record r_id=4 name='AHAN CHASIOPUI'>,
 <Record r_id=5 name='Aiciro, BG Junction'>,
 <Record r_id=6 name='Amalia Cake, Wringinanom'>,
 <Record r_id=7 name='Amanda Brownies, Sunggal'>,
 <Record r_id=8 name='Amanda Brownies, Tegalsari'>,
 <Record r_id=9 name='Aneka Gorengan Siaga'>,
 <Record r_id=10 name='Aneka Seblak Endull'>,
 <Record r_id=11 name='Angkringan Abas Krian'>,
 <Record r_id=12 name='Arangsimpur'>,
 <Record r_id=13 name='Arkana Cafe, Tuasan'>,
 <Record r_id=14 name='Assalam Resto'>,
 <Record r_id=15 name='Ayam Gacoan,Waru'>,
 <Record r_id=16 name='Ayam Trolok'>,
 <Record r_id=17 name='BANANESH'>,
 <Record r_id=18 name='BEBEK SIMAYAKHO SURABAYA'>,
 <Record r_id=19 name='BICOPI GUNUNGANYAR'>,
 <Record r_id=20 name='BS Coffee, Halat'>,
 <Record r_id=21 name='Baked Well, Petula'>,
 <Record r_id=22 name='Bakmi Kun'>,
 <Record r_id=23 name='Bakmi Lopui 93'>,
 <Record

In [4]:
restaurant_names = [r["name"] for r in restaurants]
embeddings = embedder.encode(restaurant_names)

In [5]:
embeddings

array([[ 0.02955594, -0.08057141, -0.04218303, ...,  0.04394352,
        -0.02660408,  0.08711329],
       [ 0.02478714,  0.00312811, -0.04371041, ...,  0.0083868 ,
        -0.02024836, -0.03979936],
       [ 0.02200453,  0.09500322,  0.02613436, ..., -0.00082575,
        -0.00353571,  0.03480814],
       ...,
       [ 0.01463079,  0.00223859, -0.0115818 , ..., -0.05965132,
         0.02015671, -0.03127729],
       [-0.01661389, -0.02644294, -0.02270968, ..., -0.00948348,
         0.00958199,  0.00654138],
       [ 0.02488373,  0.00939257, -0.01796604, ..., -0.02039219,
        -0.04197638,  0.04033522]], dtype=float32)

In [6]:
restaurant_id = [r["r_id"] for r in restaurants]

In [7]:
import pandas as pd

data = list(zip(restaurant_id, embeddings))
columns = ['r_id', 'dbmcv2embedding']

df = pd.DataFrame(data, columns=columns)
df

Unnamed: 0,r_id,dbmcv2embedding
0,1,"[0.029555941, -0.080571406, -0.042183034, 0.02..."
1,2,"[0.02478714, 0.0031281132, -0.043710407, -0.05..."
2,3,"[0.022004534, 0.095003225, 0.026134364, 0.0290..."
3,4,"[0.024054483, 0.08342542, -0.021543853, 0.0146..."
4,5,"[0.0014816677, -0.013306294, 0.024911638, -0.0..."
...,...,...
502,503,"[-0.051765747, 0.0835114, -0.042090345, -0.035..."
503,504,"[-0.037178908, 0.04570786, -0.027657516, -0.03..."
504,505,"[0.014630788, 0.0022385903, -0.011581804, 0.01..."
505,506,"[-0.016613891, -0.026442943, -0.022709675, -0...."


In [8]:
# Define the function to update node properties
def update_node_properties(tx, r_id, dbmcv2embedding):
    query = """
    MATCH (r:Restaurant {r_id: $r_id})
    SET r.dbmcv2embedding = $dbmcv2embedding
    """
    tx.execute_query(
        query,
        r_id=r_id,
        dbmcv2embedding=dbmcv2embedding
    )

In [9]:
for index, row in df.iterrows():
    update_node_properties(driver, row['r_id'], row['dbmcv2embedding'])
    print(f"{row['r_id']} has success")

print("All embedding data have been updated into neo4j")

1 has success
2 has success
3 has success
4 has success
5 has success
6 has success
7 has success
8 has success
9 has success
10 has success
11 has success
12 has success
13 has success
14 has success
15 has success
16 has success
17 has success
18 has success
19 has success
20 has success
21 has success
22 has success
23 has success
24 has success
25 has success
26 has success
27 has success
28 has success
29 has success
30 has success
31 has success
32 has success
33 has success
34 has success
35 has success
36 has success
37 has success
38 has success
39 has success
40 has success
41 has success
42 has success
43 has success
44 has success
45 has success
46 has success
47 has success
48 has success
49 has success
50 has success
51 has success
52 has success
53 has success
54 has success
55 has success
56 has success
57 has success
58 has success
59 has success
60 has success
61 has success
62 has success
63 has success
64 has success
65 has success
66 has success
67 has success
68 h

In [10]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Menghitung matriks kemiripan
similarity_matrix = cosine_similarity(embeddings)

# Menyimpan relasi SIMILAR_TO_DBMCV2
with driver.session() as session:
    for idx, restaurant in enumerate(restaurants):
        # Mendapatkan indeks restoran dengan kemiripan tertinggi (selain dirinya sendiri)
        similar_indices = np.argsort(similarity_matrix[idx])[::-1][1:6]
        for sim_idx in similar_indices:
            similarity_score = float(similarity_matrix[idx][sim_idx])
            session.run("""
                MATCH (r1:Restaurant {r_id: $r1_id}), (r2:Restaurant {r_id: $r2_id})
                MERGE (r1)-[s:SIMILAR_TO_DBMCV2]->(r2)
                SET s.score = $score
            """, r1_id=restaurant["r_id"], r2_id=restaurants[sim_idx]["r_id"], score=similarity_score)