<div class="header">
  <img src="img/kg_logo_white_side.png" alt="logo" style="width: 300px;"/>
  <h1>01 Elliptic GNN Training</h1>
</div>

In [1]:
# Import config file
import yaml
import warnings
warnings.filterwarnings('ignore')

with open("config.yaml", "r") as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
project_id = config['project']['project_id']

### Purpose: Train the GNN on the elliptic dataset to generate graph embeddings for downstream ML tasks
<img src="img/GNN_01.png" style="width: 1500px;"/>

## DASK DATASET PREPROCESSING

<img src="img/GNN_02.png" style="width: 600px;"/>

In [2]:
from src.elliptic_dask_preprocessing import elliptic_preprocessing
elliptic_graph = elliptic_preprocessing()

Table deleted: katana-clusters-beta.fsi_elliptic.account_features
Loaded 203769 rows and 95 columns to katana-clusters-beta.fsi_elliptic.account_features
Updated elliptic_feast/feature_repo/elliptic_features.py with baseline feature view


          0/? [?op/s]

          0/? [?op/s]

### VISUALIZATION

In [3]:
print(f"Number of nodes: {elliptic_graph.num_nodes():,}")
print(f"Number of edges: {elliptic_graph.num_edges():,}")

          0/? [?op/s]

Number of nodes: 203,769


          0/? [?op/s]

Number of edges: 468,710


In [4]:
%%time
elliptic_graph.query("MATCH (n)-->(m) RETURN * LIMIT 200",contextualize=True).visualize()

          0/? [?op/s]

          0/? [?op/s]

CPU times: user 4.03 s, sys: 289 ms, total: 4.32 s
Wall time: 1min 4s


GraphVizWidget(edges=[{'from': 25344, 'to': 281474976735846, 'label': 'rev_transaction', 'type': 'rev_transact…

## GNN IMPLEMENTATION

#### INITIALIZING GNN PIPELINE

In [5]:
from src.katana_enterprise import GNNEmbeddingPipeline
import sys,os
sys.path.append(os.path.join("/home/anuhyabs_katanagraph_com/solutions/fsi/demos/elliptic"))
elliptic_node_classification_job = GNNEmbeddingPipeline(
    display_name="elliptic_gnn_embedding_train_job", 
    optimization_prediction_type="node_classification",
    embed_dim=16,
    supervised=True
)

#### GNN TRAINING

<img src="img/GNN_03.png" style="width: 1000px;"/>

In [6]:
%%time
elliptic_node_classification_job.train(
    elliptic_graph,
    target_property_name="target",
    graph_analytics_features=["page_rank", "betweenness_centrality"],
    budget_milli_node_hours=8000,
    model_display_name="elliptic_gnn_embed",
    disable_early_stopping=False,
    sync=True,
    split_ratio=[0.8, 0.15, 0.05],
)

          0/? [?op/s]

          0/? [?op/s]

          0/? [?op/s]


Host 0 output:
Tensorboard log directory: gs://katana-internal1/tensorboard/elliptic_demo_e2e_497b08928741451d8eabcfeb2118d68b
Validation data test_score: {'validation_metric': 0.9756031134946118}
Test data test_score: {'validation_metric': 0.9814737685222267}

Host 1 output:
Tensorboard log directory: gs://katana-internal1/tensorboard/elliptic_demo_e2e_497b08928741451d8eabcfeb2118d68b
Validation data test_score: {'validation_metric': 0.9878996546219551}
Test data test_score: {'validation_metric': 0.9878013616339607}

Host 2 output:
Tensorboard log directory: gs://katana-internal1/tensorboard/elliptic_demo_e2e_497b08928741451d8eabcfeb2118d68b
Validation data test_score: {'validation_metric': 0.9837837751422626}
Test data test_score: {'validation_metric': 0.9826174116033394}

Host 3 output:
Tensorboard log directory: gs://katana-internal1/tensorboard/elliptic_demo_e2e_497b08928741451d8eabcfeb2118d68b
Validation data test_score: {'validation_metric': 0.9704226417631928}
Test data test_s


Host 0 errors:

Host 1 errors:

Host 2 errors:

Host 3 errors:


#### GNN INFERENCING

<img src="img/GNN_04.png" style="width: 1000px;"/>

In [7]:
pipeline_uri = elliptic_node_classification_job.infer_embeddings(
    elliptic_graph, 
    project_id
    )

          0/? [?op/s]


Host 0 output:
Table created: katana-clusters-beta.fsi_elliptic.elliptic_embeddings_497b08928741451d8eabcfeb2118d68b
Loaded 203769 rows and 18 columns to katana-clusters-beta.fsi_elliptic.elliptic_embeddings_497b08928741451d8eabcfeb2118d68b

Host 1 output:
Table created: katana-clusters-beta.fsi_elliptic.elliptic_embeddings_497b08928741451d8eabcfeb2118d68b
Loaded 203769 rows and 18 columns to katana-clusters-beta.fsi_elliptic.elliptic_embeddings_497b08928741451d8eabcfeb2118d68b

Host 2 output:
Table created: katana-clusters-beta.fsi_elliptic.elliptic_embeddings_497b08928741451d8eabcfeb2118d68b
Loaded 203769 rows and 18 columns to katana-clusters-beta.fsi_elliptic.elliptic_embeddings_497b08928741451d8eabcfeb2118d68b

Host 3 output:
Table created: katana-clusters-beta.fsi_elliptic.elliptic_embeddings_497b08928741451d8eabcfeb2118d68b
Loaded 203769 rows and 18 columns to katana-clusters-beta.fsi_elliptic.elliptic_embeddings_497b08928741451d8eabcfeb2118d68b


          0/? [?op/s]


Host 0 output:
Table created: katana-clusters-beta.fsi_elliptic.account_mapping_497b08928741451d8eabcfeb2118d68b
Loaded 203769 rows and 6 columns to katana-clusters-beta.fsi_elliptic.account_mapping_497b08928741451d8eabcfeb2118d68b

Host 1 output:
Table created: katana-clusters-beta.fsi_elliptic.account_mapping_497b08928741451d8eabcfeb2118d68b
Loaded 203769 rows and 6 columns to katana-clusters-beta.fsi_elliptic.account_mapping_497b08928741451d8eabcfeb2118d68b

Host 2 output:
Table created: katana-clusters-beta.fsi_elliptic.account_mapping_497b08928741451d8eabcfeb2118d68b
Loaded 203769 rows and 6 columns to katana-clusters-beta.fsi_elliptic.account_mapping_497b08928741451d8eabcfeb2118d68b

Host 3 output:
Table created: katana-clusters-beta.fsi_elliptic.account_mapping_497b08928741451d8eabcfeb2118d68b
Loaded 203769 rows and 6 columns to katana-clusters-beta.fsi_elliptic.account_mapping_497b08928741451d8eabcfeb2118d68b


In [8]:
print(pipeline_uri)

497b08928741451d8eabcfeb2118d68b


In [9]:
if (config['model']['pipeline_uri'] != pipeline_uri) or (config['model']['pipeline_uri'] is None):
    config['model']['pipeline_uri'] = pipeline_uri
with open('config.yaml', 'w') as f:
    config = yaml.dump(config, stream=f,
                       default_flow_style=False, sort_keys=False)