In [1]:
include("src/lisa_neo4j.jl")

using ..LisaNeo4j
using SQLite, DBInterface
using MurmurHash3
using TextAnalysis
using HTTP, JSON3
using Base64
using PooledArrays
using EasyConfig

db = Graph.DB("lisa.db")
# Define the URL of your Neo4j instance
url = "http://localhost:7474/db/neo4j/tx/commit"
# Define the headers
headers = ["Content-Type" => "application/json", "Authorization" => "Basic " * base64encode("neo4j:yanliu65")]

2-element Vector{Pair{String, String}}:
  "Content-Type" => "application/json"
 "Authorization" => "Basic bmVvNGo6eWFubGl1NjU="

In [2]:
# Search for all the node's references (node's sha1) with the tokens that match  "AVE", "danilo" and "name" 
rows = LisaNeo4j.search_by_tokens(db.sqlitedb, "sex", "taxi", "day")
println(rows)

# Collect all the node's references from edges table
edges = Vector()
edges_refs = LisaNeo4j.select_edges(db.sqlitedb, rows, edges)

# Collect all the nodes from search request and the nodes from the edges
refs = union(edges_refs,rows)

# Collect the actual nodes
nodes = Vector()
LisaNeo4j.select_nodes(db.sqlitedb, refs, nodes)
println("nodes: ", nodes)

# Add nodes to the Neo4j database
for node in nodes
    labels = replace(string(node.labels), ";" => "")
    query = LisaNeo4j.add_neo4j_node(labels, node)
    println(query)
    data = LisaNeo4j.request(url, headers, query)
end
# Add edges to the Neo4j database
for edge in edges
    query = LisaNeo4j.add_neo4j_edge(edge)
    data = LisaNeo4j.request(url, headers, query)
end

Set(Any["abe1c6f48f6122e051e90390122b74ba87523bf4", "4cd923b0cb16b50ffafbbafa61c9287ce236fcd1", "7b40c728e157dfb075ac740d480e2ed9ccbb301d", "593c66c6d82d797a818caee475cd32abe120db76", "3f9526f8d331b9519b8632a11b2d344ab7c647b6", "1295c9470c70b06cefd79bf9a16c5d93c1c425bb", "8ee8a041ff42b775ac295f7e4c25860cbda7c00e", "4bead3679f1aa0011445a22cc662b84848bad7cd", "0f73a283cd731d2f36ef6b029f24d28458b8fefc"])
nodes: Any[[1mDataFrameRow[0m
[1m Row [0m│[1m sha1                              [0m[1m labels         [0m[1m d_sha1                            [0m[1m dataset                           [0m[1m card  [0m[1m props                             [0m
     │[90m String                            [0m[90m String         [0m[90m String                            [0m[90m String                            [0m[90m Int64 [0m[90m String                            [0m
─────┼───────────────────────────────────────────────────────────────────────────────────────────────────────────

In [3]:
"""
    In this cell we are going to to generate relations between matched csv files (nodes with labes "csv_file")
"""
# Define your Cypher query
query = LisaNeo4j.cypher("MATCH (n:csv_file) RETURN n.labels, n.sha1, n.d_sha1, n.dataset, n.props LIMIT 20")
# Parse the response
json = LisaNeo4j.request(url, headers, query)

hlls = Dict{String, LisaNeo4j.Neo_node}()
LisaNeo4j.collect_hll_sets(json, hlls)

# Generate the relations between matched csv files
for (k, v) in hlls
    for (k1, v1) in hlls
        if k != k1
            match = SetCore.match(v.hll_set, v1.hll_set)
            println("match: ", match)
            cosine = SetCore.cosine(v.hll_set, v1.hll_set)
            println("cosine: ", round(cosine * 100))
            if match > 10
                props = JSON3.write(Dict("similarity" => match))
                dict = Dict{String, Any}("source" => k, "target" => k1, "r_type" => "match", "props" => props)
                df_row = LisaNeo4j.dict_to_dfrow(dict)
                query = LisaNeo4j.add_neo4j_edge(df_row)
                println(query)
                data = LisaNeo4j.request(url, headers, query)
            end
        end
    end
end

match: 87
cosine: 91.0
{"statements":[{"statement":"    MATCH (a), (b) WHERE a.sha1 = '6be12bee4edf7c96016907e44bb520be80dc9232' AND b.sha1 = '0b90b1fee69c77ffa3efe57db7788112ef96dba6'\n    MERGE (a)-[r:match]->(b)\n SET  r.similarity = '87'"}]}
match: 87
cosine: 91.0
{"statements":[{"statement":"    MATCH (a), (b) WHERE a.sha1 = '0b90b1fee69c77ffa3efe57db7788112ef96dba6' AND b.sha1 = '6be12bee4edf7c96016907e44bb520be80dc9232'\n    MERGE (a)-[r:match]->(b)\n SET  r.similarity = '87'"}]}


#### Now we can open Neo4j browser that we are running as a docker container locally with all default settings.

For your convenience we put screenshots in the **README.md** at the bottom.

![alt text](<Screenshot from 2024-03-22 10-29-04.png>)