# Notebook para realizar a limpeza do grafo após seu povoamento automático

In [1]:
from langchain_community.graphs import Neo4jGraph
from dotenv import load_dotenv
import os
import pandas as pd
import matplotlib.pyplot as plt

Conectando ao NEO4J

In [2]:
load_dotenv()

# Neo4j variables
NEO4J_URL = os.getenv("NEO4J_URL")
NEO4J_USERNAME =os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

#Connecting to the graph
graph = Neo4jGraph(
    url=NEO4J_URL,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD
)

Ajustando as URIs das classes CNPQ

In [None]:
# Relacoes "de" e "para" dos nós CNPQ
result = graph.query(f"""
    MATCH (n:CNPQ)-[rel]->(o)
    return distinct(type(rel)) 
            """)

print (result)

result = graph.query(f"""
    MATCH (o)-[rel]->(n:CNPQ)
    return distinct(type(rel)) 
            """)

print (result)

[{'(type(rel))': 'BFO_0000051'}]
[{'(type(rel))': 'subject'}, {'(type(rel))': 'BFO_0000051'}, {'(type(rel))': 'mentions'}]


In [8]:
# Ajustando as URI. Dada uma URI incorreta, mover todas as relações para a URI correta e deletar o nó com a URI incorreta.

# URI incorreta, URI correta, novo label
lista_correcao = [
    ('tag:stardog:api:health_sciences', 'tag:stardog:api:health_science', 'CIÊNCIAS DA SAÚDE'),
    ('tag:stardog:api:applied_social_sciences', 'tag:stardog:api:applied_social_science', 'CIÊNCIA SOCIAL APLICADA'),
    ('tag:stardog:api:6090000__8_communication', 'tag:stardog:api:communication', 'COMUNICAÇÃO'),
    ('tag:stardog:api:7030000__3_anthropology', 'tag:stardog:api:anthropology	', 'ANTROPOLOGIA'),
    ('tag:stardog:api:9180000__5_social_studies', 'tag:stardog:api:social_study', 'ESTUDOS SOCIAIS'),
    ('tag:stardog:api:8020800__2_modern_foreign_literature', 'tag:stardog:api:modern_foreign_literature', 'LITERATURA ESTRANGEIRA MODERNA')
]

for corr in lista_correcao:

    graph.query("""
    MATCH (n:CNPQ{uri:'""" + corr[0] + """'})-[rel:BFO_0000051]->(o)
    MATCH (m:CNPQ{uri:'""" + corr[1] + """'})
    MERGE (m)-[:BFO_0000051]->(o)
    """)

    graph.query("""
    MATCH (s)-[:subject]->(n:CNPQ{uri:'""" + corr[0] + """'})
    MATCH (m:CNPQ{uri:'""" + corr[1] + """'})
    MERGE (s)-[:subject]->(m)
    """)

    graph.query("""
    MATCH (s)-[:BFO_0000051]->(n:CNPQ{uri:'""" + corr[0] + """'})
    MATCH (m:CNPQ{uri:'""" + corr[1] + """'})
    MERGE (s)-[:BFO_0000051]->(m)
    """)

    graph.query("""
    MATCH (s)-[:mentions]->(n:CNPQ{uri:'""" + corr[0] + """'})
    MATCH (m:CNPQ{uri:'""" + corr[1] + """'})
    MERGE (s)-[:mentions]->(m)
    """)

    graph.query("""
    MATCH (m:CNPQ{uri:'""" + corr[1] + """'})
    SET m.label = '""" + corr[2] + """'
    """)

    graph.query("""
    MATCH (m:CNPQ{uri:'""" + corr[0] + """'})
    DETACH DELETE (m)
    """)

    print (corr)

('tag:stardog:api:health_sciences', 'tag:stardog:api:health_science', 'CIÊNCIAS DA SAÚDE')
('tag:stardog:api:applied_social_sciences', 'tag:stardog:api:applied_social_science', 'CIÊNCIA SOCIAL APLICADA')
('tag:stardog:api:6090000__8_communication', 'tag:stardog:api:communication', 'COMUNICAÇÃO')
('tag:stardog:api:7030000__3_anthropology', 'tag:stardog:api:anthropology\t', 'ANTROPOLOGIA')
('tag:stardog:api:9180000__5_social_studies', 'tag:stardog:api:social_study', 'ESTUDOS SOCIAIS')
('tag:stardog:api:8020800__2_modern_foreign_literature', 'tag:stardog:api:modern_foreign_literature', 'LITERATURA ESTRANGEIRA MODERNA')


In [9]:
# Deletando um nó CNPQ específico que não deveria existir
corr = ['tag:stardog:api:600',
        'tag:stardog:api:8765449414823306929',
        'tag:stardog:api:_600',
        'tag:stardog:api:_7277407233034425144_',
        'tag:stardog:api:_2134759983998029892_'
        ]

for uri in corr:
    graph.query(f"""
    MATCH (n:CNPQ{{uri:'{uri}'}})
    DETACH DELETE (n)
    """)