# EUIPO Taxonomy Tree Neo4j Demo
This notebook will:
* Load the taxonomy data from Json and prepare it for Neo4j

### 0. Setup Environment

In [58]:
import os
#import pandas as pd
import re
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain import PromptTemplate, LLMChain
from langchain.chat_models import ChatOpenAI
from neo4j import GraphDatabase
import json

load_dotenv(override=True)
uri = os.getenv("NEO4J_URI")
user = os.getenv("NEO4J_USER")
password = os.getenv("NEO4J_PASSWORD")
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
embedding_model = OpenAIEmbeddings()

driver = GraphDatabase.driver(uri, auth=(user, password))
driver.verify_connectivity()

# Utils
def parse_title(title):
    if title is None:
        return title
    else:
        return title.split("~", 1)[0]

def parse_class(title):
    if title is None:
        return title
    else:
        before, sep, after = title.partition("Class ")
        return after.split("~", 1)[0] if sep else title

### 1. Loading Taxonomy Tree Data

In [15]:
file_path = "../data/taxo_jsontree_20250627.json"

with open(file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)
print(f"{data}\n")




### 2. Preparing Data for Neo4j

In [61]:
def collect_nodes(nodes, node_data, level=0, parent=None):
    
    for node in nodes:
        if level == 1:
            title = parse_class(node['title'])
        else:
            title = parse_title(node['title'])
        entry = {
            'title': title,
            'level': level,
            'href': node.get('href'),
        }
        # campos adicionales según nivel o disponibilidad
        if 'key' in node:
            entry['key'] = node['key']
        if 'classScopesText' in node:
            entry['text'] = node['classScopesText']
        if parent is not None:
            entry['parent'] = parent

        #print('\t' * level + title)
        node_data.append(entry)

        # recursive
        children = node.get('children', [])
        if children:
            collect_nodes(children, node_data, level + 1, parent=title)

node_data = []
collect_nodes(data['children'], node_data)
print(node_data)



### 3. Loading Data to Neo4j

In [62]:
def create_unique_index_category(tx):
    query = """
    CREATE CONSTRAINT unique_title_category IF NOT EXISTS
    FOR (n:Category)
    REQUIRE n.title IS UNIQUE
    """
    tx.run(query)

def create_unique_index_class(tx):
    query = """
    CREATE CONSTRAINT unique_title_class IF NOT EXISTS
    FOR (n:Class)
    REQUIRE n.title IS UNIQUE
    """
    tx.run(query)

def create_unique_index_taxonomy(tx):
    query = """
    CREATE CONSTRAINT unique_title_taxonomy IF NOT EXISTS
    FOR (n:Taxonomy)
    REQUIRE n.title IS UNIQUE
    """
    tx.run(query)

def add_node(tx, node):
    lvl = node['level']

    if lvl == 0:
        label_str = "Category"
        props = {'href': node.get('href')}
    elif lvl == 1:
        label_str = "Class"
        props = {
            'href': node.get('href'),
            'key': node.get('key'),
            'text': node.get('text')
        }
    else:
        label_str = f"Taxonomy:Level_{lvl}"
        props = {
            'href': node.get('href'),
            'key': node.get('key')
        }

    query = f"""
    MERGE (n:{label_str} {{title: $title}})
    SET n += $props
    """
    tx.run(query,
           title=node['title'],
           props=props)

def add_relationship(tx, node):

    lvl = node['level']
    if lvl == 1:
        label_str_child = "Class"
        label_str_parent = "Category"
    elif lvl == 2:
        label_str_child = "Taxonomy"
        label_str_parent = "Class"
    else:
        label_str_child = "Taxonomy"
        label_str_parent = "Taxonomy"

    query = f"""
    MATCH (child:{label_str_child} {{title: $child_title}})
    MATCH (parent:{label_str_parent} {{title: $parent_title}})
    MERGE (parent)-[:HAS_CHILD]->(child)
    """
    tx.run(query, child_title=node['title'], parent_title=node['parent'])

with driver.session() as session:

    session.execute_write(create_unique_index_category)
    session.execute_write(create_unique_index_class)
    session.execute_write(create_unique_index_taxonomy)

    #Nodes
    for node in node_data:
        session.execute_write(add_node, node)

    #Rels
    for node in node_data:
        if node['level'] > 0:
            session.execute_write(add_relationship, node)