In [1]:
import py2neo

In [31]:
graph = py2neo.Graph("bolt://localhost:7687",user = "neo4j" ,password="123456789", name = "tesis") 

## Cypher

#### Total de Papers, Chunck y Facts, asi como su desgloce por paper

In [188]:
query = '''
MATCH (p:Paper)-[:hasPart]->(c:Chunk)
WITH p, count(c) AS chunkCount, collect(c) AS chunks
UNWIND chunks AS chunk
MATCH (chunk)-[:hasPart]->(f:Fact)
WITH p, chunkCount, count(DISTINCT f) AS factCount, 
     collect(chunk) AS paperChunks, collect(f) AS paperFacts
RETURN 
    count(p) AS totalPapersWithChunks,  
    size(apoc.coll.toSet(apoc.coll.flatten(collect(paperChunks)))) AS totalChunks,  
    size(apoc.coll.toSet(apoc.coll.flatten(collect(paperFacts)))) AS totalFacts,
    collect({paperId: p.identifier, chunkCount: chunkCount, factCount: factCount}) AS papersChunkFactCounts
'''
result = graph.run(query).data()

# Procesar y formatear resultados
for row in result:
    print(f"Total Papers: {row['totalPapersWithChunks']}")
    print(f"Total Chunks: {row['totalChunks']}")
    print(f"Total Facts: {row['totalFacts']}")
    print("Details:")
    for paper in row['papersChunkFactCounts']:
        print(f"  Paper EID: {paper['paperId']} - Chunk Count: {paper['chunkCount']} - Fact Count: {paper['factCount']}")


### Papers relacionados con Algoritm

In [189]:

query = '''
MATCH (p:Paper)-[:hasPart]->(c:Chunk)-[:hasPart]->(f:Fact)
MATCH (f)-[:hasSubject|hasObject]->(a:Algoritm {label: "Artificial intelligence"})
RETURN DISTINCT p.identifier AS paperId
'''
result = graph.run(query).data()
print("Papers relacionados con Algoritm (Artificial intelligence): ")
for i, record in enumerate(result, start=1):
    print(f"Paper EID: {record['paperId']}")

### KeyWords más comunes en España

In [190]:

query = '''
MATCH (k:Key)<-[:keyWords]-(p:Paper)-[:sdPublisher]->(j:Journal)-[:location]->(c:Country{name: "Spain"})
RETURN k.name AS KeyWord, COUNT(p) AS Frecuencia
ORDER BY Frecuencia DESC
limit 10
'''
result = graph.run(query).data()
print("KeyWords más comunes en España: ")
for i, record in enumerate(result, start=1):
    print(f"KeyWords: {record['KeyWord']} - frecuencia: {record['Frecuencia']}")


### GDS

In [29]:

query = '''
CALL gds.graph.project.cypher(
    'authorPageRankGraph',
    'MATCH (a:Author) RETURN id(a) AS id',
    
    'MATCH (a1:Author)-[:author]->(p:Paper)-[:creator]->(a2:Author)
     WHERE id(a1) < id(a2)
     RETURN id(a1) AS source, id(a2) AS target
    '
)

'''
graph.run(query)

query = '''
CALL gds.graph.project(
  'journalGraph',                     
  ['Journal', 'Paper', 'Publisher'],  
  {
    sdJournal: {
      type: 'sdPublisher',
      orientation: 'UNDIRECTED'       
    },
    publisher: {
      type: 'publisher',
      orientation: 'UNDIRECTED'
    }
  }
)

'''
graph.run(query)

query = '''
CALL gds.graph.project('paperReferences', 'Paper', 'references')
'''
graph.run(query)



query='''


CALL gds.graph.project.cypher(
  'paperLabelSimilarityGraph',
  
  // Proyección de nodos
  '
  MATCH (p:Paper)
  RETURN id(p) AS id
  ',
  
  // Proyección de relaciones
  '
  MATCH (p:Paper)-[:hasPart]->(c:Chunk)-[:hasPart]->(f:Fact)-[e:hasSubject]->(o)
  RETURN id(p) AS source, id(o) AS target
  ',

  
  // Configuración
  {
    validateRelationships: false
  }
)

         '''
graph.run(query)


In [13]:
query = '''
CALL gds.graph.list()
YIELD graphName, nodeCount, relationshipCount
RETURN graphName, nodeCount, relationshipCount
'''
result = graph.run(query)
print(result.data())

## pageRank Authores

In [101]:
query = '''
 CALL gds.pageRank.stream('authorPageRankGraph')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS Author, score
ORDER BY score DESC
LIMIT 20

'''
result = graph.run(query)

# Imprimir los datos
for i, record in enumerate(result, start=1):
    print(f"Rango {i:<2} - {record['Author']:<{33}} - score {record['score']:>0.2f}") 

### Betwness

In [119]:
query = '''
CALL gds.betweenness.stream('journalGraph')
YIELD nodeId, score
WHERE gds.util.asNode(nodeId):Journal  
RETURN 
  gds.util.asNode(nodeId).identifier AS journalId, 
  gds.util.asNode(nodeId).title AS title, 
  score
ORDER BY score DESC
LIMIT 10

'''
result = graph.run(query)
r = result
for i, record in enumerate(result, start=1):
    print(f"Rango {i:<2} - Id:{record['journalId']:<10} - score: {record['score']:>0.2f}") 
    print(f"- Nombre: {record['title']}\n")

## Closeness

In [197]:
query = '''
CALL gds.closeness.stream('paperReferences')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS id, score
ORDER BY score DESC

'''
graph.run(query)

### similitud

In [12]:
query = '''
CALL gds.nodeSimilarity.stream('similitudPaper')
YIELD node1, node2, similarity
WITH 
    gds.util.asNode(node1) AS Node1, 
    gds.util.asNode(node2) AS Node2, 
    similarity
RETURN 
    Node1.identifier AS Node1, 
    Node2.identifier AS Node2, 
    similarity
ORDER BY similarity DESC

'''

result = graph.run(query)
result


### Eliminar de memoria

In [30]:

query = '''
CALL gds.graph.list()
YIELD graphName
CALL gds.graph.drop(graphName)
YIELD graphName AS droppedGraph
RETURN droppedGraph
'''

result = graph.run(query)
result


In [35]:
query = '''
CALL gds.alpha.shortestPath.average({
  nodeProjection: '*',
  relationshipProjection: '*'
})
YIELD averageLength
RETURN averageLength

'''

result = graph.run(query)
result
