# Environment set up

In [None]:
# the endpoint generated by the cloudformation stack for neptune
endpoint = 'from cloudformation'

In [None]:
import neptune_ml_utils as neptune_ml
neptune_ml.check_ml_enabled()

In [None]:
from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.strategies import *
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver.aiohttp.transport import AiohttpTransport
from gremlin_python.process.traversal import *
from gremlin_python.process.graph_traversal import id_
import os

In [None]:
s3_bucket_uri="s3://telcograph"

# Launch the export
## Frame the link prediction task in the export_param

In [None]:
export_params={ 
"command": "export-pg", 
"params": { "endpoint": neptune_ml.get_host(),
            "profile": "neptune_ml",
            "useIamAuth": neptune_ml.get_iam(),
            "cloneCluster": False,
            "nodeLabels": ["user", "cell"],
            "edgeLabels": ["user_live_cell"]
            }, 
"outputS3Path": f'{s3_bucket_uri}/neptune-export',
"additionalParams": {
        "neptune_ml": {
          "version": "v2.0",
          "targets": [
            {
                "edge": ["user", "user_live_cell", "cell"],
                "type" : "link_prediction",
                "split_rate": [0.8, 0.1, 0.1]
            }
         ]
        }
      },
"jobSize": "xlarge"}
export_params

# Data processing/Preparation of graph data for Training

NB. Precise in export_param the S3 with the latest export to consider 

In [None]:
# The training_job_name can be set to a unique value below, otherwise one will be auto generated
training_job_name=neptune_ml.get_training_job_name('link-prediction')

processing_params = f"""
--config-file-name training-data-configuration.json
--job-id {training_job_name} 
--instance-type ml.r5.16xlarge
--s3-input-uri s3://telcograph/neptune-export/20230518_072439
--s3-processed-uri {str(s3_bucket_uri)}/preloading """

In [None]:
%neptune_ml dataprocessing start --wait --store-to processing_results {processing_params}

# Training 

<div style="background-color:#eeeeee; padding:20px; text-align:left; border-radius:10px; margin-top:10px; margin-bottom:10px; "><b>Information</b>: Link prediction is a more computationally complex model than classification or regression </div>

## Transductive training summary info.

- job_id = "my-train-job-ept-hypmodv5"
- Hyperparam config: s3://telcograph/preloading/link-pre-preloading-2023-05-18-07-32-9000000/preloading-output/model-hpo-configuration.json 
- versionID:  Ezh_iKdzXyDCM5ZxZxufYcSeVkcD7_5c
- Tag: transductive
- Data processing link-prediction-1684395141
- training params 

training_params=f"""
--job-id {job_id} 
--data-processing-id {training_job_name}
--instance-type ml.g4dn.16xlarge
--s3-output-uri {str(s3_bucket_uri)}/training
--max-hpo-number 9
--max-hpo-parallel 3 """

## Inductive training summary info 

- job_id = "my-train-job-ept-hypmodv7"
- Hyperparam config: s3://telcograph/preloading/link-pre-preloading-2023-05-18-07-32-9000000/preloading-output/model-hpo-configuration.json 
- versionID: N7PW.8pN.qbVC1w7OlKDc2qYU8QGEPRO
- Tag: inductive- prop edge set to FALSE

In [None]:
#check the Ids for training and processing jobs
job_id = "my-train-job-inductive-hypmodv8"
training_job_name

In [None]:
training_params=f"""
--job-id {job_id} 
--data-processing-id {training_job_name}
--instance-type ml.g4dn.16xlarge
--s3-output-uri {str(s3_bucket_uri)}/training
--max-hpo-number 2
--max-hpo-parallel 2 """

In [None]:
training_params

In [None]:
%neptune_ml training start --wait --store-to training_results {training_params}

# Inference 

## Endpoint creation 

In [None]:
endpoint_params=f"""
--id {job_id}
--model-training-job-id {job_id}"""
endpoint_params 

In [None]:
%neptune_ml endpoint create --wait --store-to endpoint_results {endpoint_params}

In [None]:
endpoint_results['endpoint']['name']

In [None]:
endpoint_inductive = endpoint_results['endpoint']['name']

# Inductive Mode: Querying using Gremlin

### Inductive prediction with Gremlin queries 

### load new user and some user edges 

In [None]:
%load

In [None]:
%%gremlin
g.V()
.hasId("user_20000")

In [None]:
%%gremlin
g.V()
.hasId("user_20000")
.outE()
.hasLabel("user_live_cell")
.inV()
.valueMap(true, "name")
.groupCount()
.unfold()
.order()
.by(values, desc)

In [None]:
%%gremlin
g.with("Neptune#ml.endpoint","${endpoint_inductive}").
    with("Neptune#ml.limit",10).
      V().hasId('user_20000').
        out('user_live_cell')
        .with("Neptune#ml.prediction")
        .with("Neptune#ml.inductiveInference")
        .hasLabel('cell')
        .id()

<div style="background-color:#eeeeee; padding:20px; text-align:left; border-radius:10px; margin-top:10px; margin-bottom:10px; "><b>Information</b>: both cell_34832, cell_19832 are in the ground truth linked to user_20000 </div>
