In [102]:
import yaml
import pandas as pd
from graphdatascience import GraphDataScience

In [103]:
with open('config.yaml', 'r') as file:
    PARAM = yaml.safe_load(file)


url = "bolt://localhost:7687"
gds = GraphDataScience(url, auth=(PARAM["neo4j_username"], PARAM["neo4j_password"]))

In [104]:
G, result = gds.graph.project(
    "contextGraph",                  #  Graph name
    {
    "Known": { "properties":['protection_status_one_hot', 'site_area_in_hectares', 'site_length_km', 'valuation_methods_one_hot', 'value_year', 'beneficiary_unit_one_hot', 'int__per_hectare_per_year'] },
    "Unknown": {"properties":['protection_status_one_hot', 'site_area_in_hectares', 'site_length_km', 'valuation_methods_one_hot', 'value_year', 'beneficiary_unit_one_hot']},
    "Country": {},
    "Seea": {},
    "Ecosystem": {},
    "Ecozone": {},
    "Biome": {},
    "Cices_class": {},
    "Cices_division": {},
    "Cices_group": {},
    "Cices_section": {},
    "Teeb_service": {},
    "Teeb_subservice": {},
    "Teeb_category": {},
    "Subregion": {},
    "Region": {}
  },                 #  Node projection
  {
    "IS_LOCATE_IN": {"orientation": 'UNDIRECTED'},
    "IS_AN_SEEA_OF": {"orientation": 'UNDIRECTED'},
    "IS_AN_ECOSYSTEM_OF": {"orientation": 'UNDIRECTED'},
    "ECOSYSTEM_BELONGS_TO": {"orientation": 'UNDIRECTED'},
    "IS_AN_ECOZONE_OF": {"orientation": 'UNDIRECTED'},
    "ECOZONE_BELONGS_TO": {"orientation": 'UNDIRECTED'},
    "IS_A_BIOME_OF": {"orientation": 'UNDIRECTED'},
    "BELONGS_TO_CLASS_OF_CICES": {"orientation": 'UNDIRECTED'},
    "IS_A_GROUP_OF_CICES": {"orientation": 'UNDIRECTED'},
    "IS_A_DIVISION_OF_CICES": {"orientation": 'UNDIRECTED'},
    "IS_A_CLASS_OF_CICES": {"orientation": 'UNDIRECTED'},
    "PROVIDES_SERVICE": {"orientation": 'UNDIRECTED'},
    "PROVIDES_SUBSERVICE": {"orientation": 'UNDIRECTED'},
    "COUNTRY_IS_LOCATE_IN": {"orientation": 'UNDIRECTED'},
    "SUBREGION_IS_LOCATE_IN": {"orientation": 'UNDIRECTED'},
    "IS_A_SUBSERVICE_OF": {"orientation": 'UNDIRECTED'},
    "IS_A_SERVICE_OF":  {"orientation": 'UNDIRECTED'}     
    },              #  Relationship projection
    readConcurrency=4           #  Configuration parameters
)

In [105]:
pipe_with_context = gds.nr_pipe("pipe-with-context")

In [106]:
## You can mutate the graph with fastRP

# gds.fastRP.mutate(G, 
# embeddingDimension= 16,
# mutateProperty='embedding'
# )

# gds.graph.nodeProperties.stream(
#     G,
#     ['embedding'],
#     ["Known", "Unknown"]
# )

In [107]:
pipe_with_context.addNodeProperty(
    "fastRP",
    mutateProperty="embedding",
    embeddingDimension=32,
    contextNodeLabels=['Country', 'Seea', 'Ecosystem', 'Ecozone', 'Ecosystem', 'Biome','Cices_class', 'Teeb_service', 'Teeb_subservice', 'Subregion', 'Region'],
)

name                                                 pipe-with-context
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'mut...
featureProperties                                                   []
splitConfig                {'testFraction': 0.3, 'validationFolds': 3}
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace            {'LinearRegression': [], 'RandomForest': []}
Name: 0, dtype: object

In [108]:
pipe_with_context.selectFeatures(["embedding"])

name                                                 pipe-with-context
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'mut...
featureProperties                                          [embedding]
splitConfig                {'testFraction': 0.3, 'validationFolds': 3}
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace            {'LinearRegression': [], 'RandomForest': []}
Name: 0, dtype: object

In [109]:
pipe_with_context.addRandomForest(numberOfDecisionTrees=10)

name                                                 pipe-with-context
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'mut...
featureProperties                                          [embedding]
splitConfig                {'testFraction': 0.3, 'validationFolds': 3}
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'LinearRegression': [], 'RandomForest': [{'ma...
Name: 0, dtype: object

In [110]:
pipe_with_context.configureAutoTuning(maxTrials=5)

name                                                 pipe-with-context
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'mut...
featureProperties                                          [embedding]
splitConfig                {'testFraction': 0.3, 'validationFolds': 3}
autoTuningConfig                                      {'maxTrials': 5}
parameterSpace       {'LinearRegression': [], 'RandomForest': [{'ma...
Name: 0, dtype: object

In [111]:
model, stats = pipe_with_context.train(
    G,
    targetNodeLabels=["Known"],
    modelName="nr-pipeline-model-contextual",
    targetProperty="int__per_hectare_per_year",
    metrics=["ROOT_MEAN_SQUARED_ERROR"],
    concurrency=4
)

Node Regression Train Pipeline:   0%|          | 0/100 [00:00<?, ?%/s]

In [112]:
print (stats["modelInfo"]["metrics"]["ROOT_MEAN_SQUARED_ERROR"])

{'test': 37724.08777768367, 'validation': {'min': 33075.82858659399, 'max': 46408.0626480512, 'avg': 41661.88175823811}, 'outerTrain': 35632.74546814644, 'train': {'min': 30264.379322638295, 'max': 40411.77798696447, 'avg': 35486.29736719002}}


In [113]:
predicted = model.predict_stream(
    G, modelName="nr-pipeline-model-contextual", targetNodeLabels=["Unknown"]
)
     


In [114]:
predicted["name"] = [x["name"] for x in gds.util.asNodes(predicted["nodeId"].to_list())]

In [115]:
predicted.rename(columns={"predictedValue": "FastRP_rf"})

Unnamed: 0,nodeId,FastRP_rf,name
0,23,76859.79227,12270007
1,31,9307.74632,12250001
2,35,75520.16106,12240002
3,41,76859.79227,12230006
4,46,46968.94177,12210004
...,...,...,...
3471,10291,17839.40498,12410007
3472,10293,99936.38500,12400002
3473,10308,2935.39080,12360001
3474,10315,1104.49807,12350007


In [116]:
predicted.to_csv("predicted_FastRP.tsv", sep="\t", index=False)