In [1]:
import pandas as pd
import os

import stellargraph as sg
from stellargraph.mapper import DirectedGraphSAGENodeGenerator
from stellargraph.layer import DirectedGraphSAGE

from tensorflow.keras import layers, optimizers, losses, metrics, Model
from sklearn import preprocessing, feature_extraction, model_selection
from stellargraph import datasets
from IPython.display import display, HTML
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
dataset = datasets.Cora()
display(HTML(dataset.description))
G, node_subjects = dataset.load(directed=True)

In [3]:
print(G.info())

StellarDiGraph: Directed multigraph
 Nodes: 2708, Edges: 5429

 Node types:
  paper: [2708]
    Features: float32 vector, length 1433
    Edge types: paper-cites->paper

 Edge types:
    paper-cites->paper: [5429]
        Weights: all 1 (default)


In [4]:
set(node_subjects)

{'Case_Based',
 'Genetic_Algorithms',
 'Neural_Networks',
 'Probabilistic_Methods',
 'Reinforcement_Learning',
 'Rule_Learning',
 'Theory'}

In [11]:
train_subjects, test_subjects = model_selection.train_test_split(
    node_subjects, train_size=0.1, test_size=None, stratify=node_subjects
)
train_subjects

33325                  Theory
35905         Neural_Networks
1122460       Neural_Networks
1153853    Genetic_Algorithms
1152308            Case_Based
                  ...        
646900     Genetic_Algorithms
74749                  Theory
763181        Neural_Networks
987197                 Theory
12337         Neural_Networks
Name: subject, Length: 270, dtype: object

In [7]:
test_subjects

641956           Neural_Networks
1104851                   Theory
136766             Rule_Learning
632796     Probabilistic_Methods
162664     Probabilistic_Methods
                   ...          
608326        Genetic_Algorithms
1120169                   Theory
1114336               Case_Based
65074      Probabilistic_Methods
82920         Genetic_Algorithms
Name: subject, Length: 2438, dtype: object

In [8]:

target_encoding = preprocessing.LabelBinarizer()

train_targets = target_encoding.fit_transform(train_subjects)
test_targets = target_encoding.transform(test_subjects)

In [9]:
train_targets

array([[0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])