In [3]:
import pandas as pd

from stellargraph import StellarGraph
from stellargraph.data import UnsupervisedSampler, EdgeSplitter

2022-02-05 17:33:03.748639: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-05 17:33:03.755345: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-05 17:33:03.755575: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-05 17:33:03.756272: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropri

# Load the data

In [1]:
from shared.schema import DatasetSchema, GraphSchema
from shared.graph.loading import pd_from_entity_schema

def stellargraph_from_graph_schema(
        schema: GraphSchema,
        explicit_label: bool = True,
        explicit_timestamp: bool = True,
        unix_timestamp: bool = False,
        prefix_id: bool = False,
) -> StellarGraph:
    from stellargraph import StellarGraph

    def include_properties(cs):
        return [c for c in cs if c.startswith('feat_') or c == 'label']

    nodes_dfs = {
        label: pd_from_entity_schema(
            entity_schema,
            explicit_label=explicit_label,
            explicit_timestamp=explicit_timestamp,
            include_properties=include_properties,
            unix_timestamp=unix_timestamp,
            prefix_id=prefix_id,
        ).set_index('id').drop(columns=['type']).sort_index()
        for label, entity_schema in schema.nodes.items()
    }

    edges_dfs = {
        label: pd_from_entity_schema(
            entity_schema,
            explicit_label=explicit_label,
            explicit_timestamp=explicit_timestamp,
            include_properties=include_properties,
            unix_timestamp=unix_timestamp,
            prefix_id=prefix_id,
        ).reset_index().drop(columns=['type']).drop_duplicates(subset=['src', 'dst', 'timestamp'])
        for label, entity_schema in schema.edges.items()
    }

    cursor = 0
    for df in edges_dfs.values():
        df.index += cursor
        cursor += len(df)

    graph = StellarGraph(
        nodes={k: df.drop(columns=['label']) for k, df in nodes_dfs.items()},
        edges=edges_dfs,
        source_column='src',
        target_column='dst',
    )

    return graph

In [2]:
DATASET = DatasetSchema.load_schema('star-wars')
schema = GraphSchema.from_dataset(DATASET)

In [3]:
graph = stellargraph_from_graph_schema(schema)

[2022-02-05 17:59:24,096][tensorflow][DEBUG] Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.
[2022-02-05 17:59:24,204][h5py._conv][DEBUG] Creating converter from 7 to 5
[2022-02-05 17:59:24,205][h5py._conv][DEBUG] Creating converter from 5 to 7
[2022-02-05 17:59:24,206][h5py._conv][DEBUG] Creating converter from 7 to 5
[2022-02-05 17:59:24,207][h5py._conv][DEBUG] Creating converter from 5 to 7


2022-02-05 17:59:24.888469: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-05 17:59:24.894632: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-05 17:59:24.894871: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-05 17:59:24.895638: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropri

# Split dataset

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from stellargraph.data import EdgeSplitter

In [7]:
edge_splitter_test = EdgeSplitter(graph)
graph_sub_test, examples_test, labels_test = edge_splitter_test.train_test_split(
    p=0.05, method="global"
)

print(graph_sub_test.info())

** Sampled 103 positive and 103 negative edges. **
StellarGraph: Undirected multigraph
 Nodes: 113, Edges: 1975

 Node types:
  Character: [113]
    Features: float32 vector, length 32
    Edge types: Character-INTERACTIONS->Character, Character-MENTIONS->Character

 Edge types:
    Character-MENTIONS->Character: [1068]
        Weights: all 1 (default)
        Features: none
    Character-INTERACTIONS->Character: [907]
        Weights: all 1 (default)
        Features: none


In [8]:
train_size = 0.75
val_size = 0.25

edge_splitter_train = EdgeSplitter(graph_sub_test)
graph_train, examples, labels = edge_splitter_train.train_test_split(
    p=0.1, method="global"
)

(
    examples_train,
    examples_val,
    labels_train,
    labels_val,
) = train_test_split(examples, labels, train_size=train_size, test_size=val_size)

print(graph_train.info())

** Sampled 197 positive and 197 negative edges. **
StellarGraph: Undirected multigraph
 Nodes: 113, Edges: 1778

 Node types:
  Character: [113]
    Features: float32 vector, length 32
    Edge types: Character-INTERACTIONS->Character, Character-MENTIONS->Character

 Edge types:
    Character-MENTIONS->Character: [976]
        Weights: all 1 (default)
        Features: none
    Character-INTERACTIONS->Character: [802]
        Weights: all 1 (default)
        Features: none


In [12]:
pd.DataFrame(
    [
        (
            "Training Set",
            len(examples_train),
            "Train Graph",
            "Test Graph",
        ),
        (
            "Validation Set",
            len(examples_val),
            "Train Graph",
            "Test Graph",
        ),
        (
            "Test set",
            len(examples_test),
            "Test Graph",
            "Full Graph",
        ),
    ],
    columns=("Split", "Number of Examples", "Hidden from", "Picked from"),
).set_index("Split")

Unnamed: 0_level_0,Number of Examples,Hidden from,Picked from
Split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Training Set,295,Train Graph,Test Graph
Validation Set,99,Train Graph,Test Graph
Test set,206,Test Graph,Full Graph


# Train Attri2Vec

In [13]:
from tensorflow import keras

from stellargraph.data import UnsupervisedSampler
from stellargraph.mapper import Attri2VecLinkGenerator
from stellargraph.layer import Attri2Vec, link_classification

In [14]:
batch_size = 30
epochs = 100
walk_length = 5
walk_number = 2

In [16]:
unsupervised_samples = UnsupervisedSampler(
    graph_train, nodes=list(graph_train.nodes()), length=walk_length, number_of_walks=walk_number
)

generator = Attri2VecLinkGenerator(graph_train, batch_size)

In [17]:
layer_sizes = [128]

hinsage = Attri2Vec(layer_sizes=layer_sizes, generator=generator, bias=False, normalize=None)
x_inp, x_out = hinsage.in_out_tensors()