In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

from NEExT import NEExT

%reload_ext autoreload
%autoreload 2

In [2]:
edge_file_path = "../data/ABCDO-full/edges.csv"
node_graph_mapping_file_path = "../data/ABCDO-full/graph_mapping.csv"
features_file_path = "../data/ABCDO-full/features.csv"

In [None]:
from NEExT.collections import EgonetCollection
from NEExT.features import NodeFeatures, StructuralNodeFeatures
from NEExT.io import GraphIO
from NEExT.ml_models import MLModels

target = "is_outlier"
sample_size = 5

graph_io = GraphIO()
subgraph_collection = EgonetCollection()


graph_collection = graph_io.read_from_csv(
    edges_path=edge_file_path,
    node_graph_mapping_path=node_graph_mapping_file_path,
    node_features_path=features_file_path,
    graph_type="igraph",
)
subgraph_collection.create_egonets_from_graphs(
    graph_collection=graph_collection,
    egonet_target=target,
    egonet_algorithm="k_hop_egonet",
    skip_features=["community_id"],
    max_hop_length=1
)

In [None]:
structural_node_features = StructuralNodeFeatures(
    graph_collection=subgraph_collection,
    feature_list=["all"],
    feature_vector_length=3,
    n_jobs=8,
)
node_features = NodeFeatures(
    subgraph_collection,
    feature_list=["random_community_feature"],
)

structural_features = structural_node_features.compute()
features = node_features.compute()

Computing structural node features:   0%|          | 0/1000 [00:00<?, ?it/s]

In [6]:
from NEExT.builders import EmbeddingBuilder

for s in ["separate_embedding", "combined_embedding", "structural_embedding", "merge_egonet_node_features", "only_egonet_node_features"]:
    emb_builder = EmbeddingBuilder(subgraph_collection, strategy=s, structural_features=structural_features, features=features,)
    embeddings = emb_builder.compute(10, 6)
    
    ml_models = MLModels(
        graph_collection=subgraph_collection,
        embedding=embeddings,   
        model_type='classifier',
    )

    results = ml_models.compute()
    print(s, f"Model trained with average accuracy: {np.mean(results['accuracy']):.4f}")

separate_embedding Model trained with average accuracy: 0.9580
combined_embedding Model trained with average accuracy: 0.9560
structural_embedding Model trained with average accuracy: 0.9580
merge_egonet_node_features Model trained with average accuracy: 0.9767
only_egonet_node_features Model trained with average accuracy: 0.9453
