# Node Embeddings with Properties

### Imports
Importieren der benötigten Bibliotheken


In [2]:
from neo4j import GraphDatabase
import graphdatascience



### Connection
Dabei wird eine Verbindung zur Datenbank aufgebaut und die Graph Data Science Library initialisiert.

In [9]:
gds = graphdatascience.GraphDataScience("neo4j://localhost:7687", auth=("neo4j", "bachelorarbeit"))
gdsdriver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "bachelorarbeit")).session(database="neo4j")

In [10]:
def run_query(query, parameters=None):
    with gdsdriver as session:
        result = session.run(query, parameters)
        return [record.data() for record in result]
    
check_if_exist_query = """
RETURN gds.graph.exists('propertyGraph') 
"""

# Projection query
projection_query = """
MATCH (source)-[r:HAS_START|HAS_END]->(target)
WHERE source:Trip
WITH gds.graph.project(
  'propertyGraphWithProperties',
  source,
  target,
  {
  sourceNodeProperties: source { year: coalesce(source.validFrom.year, 0), month: coalesce(source.validFrom.month, 0), day: coalesce(source.validFrom.day, 0),
  hour: coalesce(source.validFrom.hour, 0), minute: coalesce(source.validFrom.minute, 0), weekday: coalesce(source.validFrom.dayOfWeek, 0)},
  targetNodeProperties: target { year: coalesce(target.validFrom.year, 0), month: coalesce(target.validFrom.month, 0), day: coalesce(target.validFrom.day, 0),
  hour: coalesce(target.validFrom.hour, 0), minute: coalesce(target.validFrom.minute, 0), weekday: coalesce(target.validFrom.dayOfWeek, 0)}
  },
  {undirectedRelationshipTypes: ['*']}
) as g
RETURN g.graphName AS graph, g.nodeCount AS nodes, g.relationshipCount AS rels
"""

advanced_projection_query = """
MATCH (source)-[r:HAS_START|HAS_END]->(target)MATCH (source)-[r:HAS_START|HAS_END]->(target)
WHERE source:Trip
WITH gds.graph.project(
  'propertyGraphWithProperties',
  source,
  target,
  {
    sourceNodeProperties: source { 
      year: coalesce(source.validFrom.year, 0), 
      month: coalesce(source.validFrom.month, 0), 
      day: coalesce(source.validFrom.day, 0),
      hour: coalesce(source.validFrom.hour, 0), 
      minute: coalesce(source.validFrom.minute, 0), 
      weekday: coalesce(source.validFrom.dayOfWeek, 0),
      season: CASE
                WHEN source.validFrom.month IN [12, 1, 2] THEN 1
                WHEN source.validFrom.month IN [3, 4, 5] THEN 2
                WHEN source.validFrom.month IN [6, 7, 8] THEN 3
                WHEN source.validFrom.month IN [9, 10, 11] THEN 4
                ELSE 0
              END,
      isWeekend: CASE
                   WHEN source.validFrom.dayOfWeek IN [6, 7] THEN 1
                   WHEN source.validFrom.dayOfWeek IN [1, 2, 3, 4, 5] THEN 0
                   ELSE 2
                 END
    },
    targetNodeProperties: target { 
      year: coalesce(target.validFrom.year, 0), 
      month: coalesce(target.validFrom.month, 0), 
      day: coalesce(target.validFrom.day, 0),
      hour: coalesce(target.validFrom.hour, 0), 
      minute: coalesce(target.validFrom.minute, 0), 
      weekday: coalesce(target.validFrom.dayOfWeek, 0),
      season: CASE
                WHEN target.validFrom.month IN [12, 1, 2] THEN 1
                WHEN target.validFrom.month IN [3, 4, 5] THEN 2
                WHEN target.validFrom.month IN [6, 7, 8] THEN 3
                WHEN target.validFrom.month IN [9, 10, 11] THEN 4
                ELSE 0
              END,
      isWeekend: CASE
                   WHEN target.validFrom.dayOfWeek IN [6, 7] THEN 1
                     WHEN target.validFrom.dayOfWeek IN [1, 2, 3, 4, 5] THEN 0
                   ELSE 2
                 END
    }
  },
  {undirectedRelationshipTypes: ['*']}
) AS g
RETURN g.graphName AS graph, g.nodeCount AS nodes, g.relationshipCount AS rels
"""

run_query(advanced_projection_query)

[{'graph': 'propertyGraphWithProperties', 'nodes': 727085, 'rels': 2906174}]

In [11]:
G = gds.graph.get("propertyGraphWithProperties")

result = gds.fastRP.write.estimate(
    G,
    writeProperty="temporalEmbeddingOneIteration",
    randomSeed = 42,
    embeddingDimension=128,
    propertyRatio = 0.8,
    featureProperties= ['day', 'hour', 'minute', 'weekday'],
    iterationWeights=[1.0]
)

result

requiredMemory                                                1131 MiB
treeView             Memory Estimation: 1131 MiB\n|-- algorithm: 11...
mapView              {'memoryUsage': '1131 MiB', 'name': 'Memory Es...
bytesMin                                                    1186608864
bytesMax                                                    1186608864
nodeCount                                                       727085
relationshipCount                                              2906174
heapPercentageMin                                                  0.3
heapPercentageMax                                                  0.3
Name: 0, dtype: object

In [6]:
result = gds.fastRP.write(
    G,
    writeProperty="temporalEmbeddingOneIteration",
    randomSeed = 42,
    embeddingDimension=128,
    propertyRatio = 0.8,
    featureProperties= ['day', 'hour', 'minute', 'weekday'],
    iterationWeights= [1.0]
)

print(result)

FastRP:   0%|          | 0/100 [00:00<?, ?%/s]

nodeCount                                                           727085
nodePropertiesWritten                                               727085
preProcessingMillis                                                      2
computeMillis                                                         3722
writeMillis                                                           7094
configuration            {'writeProperty': 'temporalEmbeddingOneIterati...
Name: 0, dtype: object


In [12]:
G.drop()
gdsdriver.close()