In [1]:
from pathlib import Path
from pydantic import BaseModel
from typing import Any
from neo4j import GraphDatabase
from dotenv import load_dotenv
import json
import os
import duckdb
load_dotenv()

True

In [97]:
class Neo4jGraph:

    def __init__(self, neo4j_uri:str, neo4j_username:str, neo4j_password:str, db:str)->None:
        self.uri  = neo4j_uri
        self.auth = (neo4j_username, neo4j_password)
        self.db = db
        self.driver = GraphDatabase.driver(self.uri, auth=self.auth)

    def query(self, query:str, params:dict):
        with self.driver.session(database=self.db) as session:
            result = session.run(query, params)
            return [r for r in result]
        
class Node(BaseModel):
    id:int
    label: list[str]
    properties:dict[str, Any]

class Relation(BaseModel):
    id:str
    label:str
    properties:dict[str, Any]


In [98]:
def merge_node(graph:Neo4jGraph, nodes:list[Node]):
    res = graph.query(
        "UNWIND $data as row "
        "CALL apoc.merge.node("
        "row.label, "
        "{id:row.id}, "
        "row.properties, "
        "row.properties ) "
        "YIELD node "
        "RETURN node"
        ,
        {
            "data":[
                node.__dict__ for node in nodes
            ]
        }
    )
    return res

In [39]:
base_path = Path().cwd().parent
source_path = base_path / Path('silver/anilist/anime/anime-2024-1-11.parquet')

neo4j_uri = os.environ['neo4j_uri']
neo4j_username = os.environ['neo4j_username']
neo4j_password = os.environ['neo4j_password']
neo4j_dbname = os.environ['neo4j_dbname']

In [100]:
graph = Neo4jGraph(
    neo4j_uri,
    neo4j_username,
    neo4j_password,
    neo4j_dbname,
)

In [33]:
tb_anime = duckdb.read_parquet(str(source_path))
tb_anime.shape

(760, 24)

In [34]:
doc_list = tb_anime.fetchmany(size=1000)
columns = tb_anime.columns

In [35]:
nodes = []
node_labels  = ['Anime']
for doc in doc_list:
    id_ = doc[0]
    prop = dict(zip(columns[1:], doc[1:]))
    node = Node(id=id_, label=node_labels, properties=prop)
    nodes.append(node)


In [60]:
res = merge_node(graph, nodes)