In [41]:
import numpy as np
import tensorflow as tf
import time
from itertools import islice
from sklearn.metrics import f1_score

from dataloader.cora import load_cora
from minibatch import build_batch_from_nodes as build_batch
from graphsage import GraphSageSupervised as GraphSage

In [3]:


#### NN parameters
SAMPLE_SIZES = [5, 5] # implicit number of layers
INTERNAL_DIM = 128
#### training parameters
BATCH_SIZE = 256
TRAINING_STEPS = 100
LEARNING_RATE = 0.5


In [4]:
 num_nodes, raw_features, labels, num_classes, neigh_dict = load_cora()

In [5]:
num_nodes, raw_features.shape,raw_features, labels, num_classes,

(2708,
 (2708, 1433),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 array([[0],
        [1],
        [2],
        ...,
        [5],
        [6],
        [0]], dtype=int64),
 7)

In [None]:
graphsage = GraphSage(raw_features, INTERNAL_DIM, len(SAMPLE_SIZES), num_classes)


In [6]:
all_nodes = np.random.permutation(num_nodes)

In [7]:
all_nodes

array([2648, 2253,  413, ...,   75, 1576, 1306])

In [8]:
train_nodes = all_nodes[:2048]
test_nodes = all_nodes[2048:]

In [13]:
list(reversed([1,2,3]) )

[3, 2, 1]

In [14]:
neigh_dict[5]

array([ 490, 2164])

In [37]:
v1 = np.zeros(100)
v1[[2,4,5]] = 1
v2 = np.zeros(100)
v2[[20,40,50]] = 1

v3 = np.zeros(100)
v3[[12,15,50,60,13]] = 1
v  = np.stack([v1,v2,v3])
v

array([[0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1.,
        0., 0.

In [38]:
mask = np.any(v.astype(np.bool), axis=0)

In [39]:
mask


array([False, False,  True, False,  True,  True, False, False, False,
       False, False, False,  True,  True, False,  True, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False,  True, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False])

In [41]:
adj_mat = v[:,mask]
adj_mat

array([[1., 1., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 1., 1., 0.],
       [0., 0., 0., 1., 1., 1., 0., 0., 1., 1.]])

In [42]:
np.sum(adj_mat, axis=1, keepdims=True)

array([[3.],
       [3.],
       [5.]])

In [48]:
np.arange(mask.size)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [49]:
np.arange(mask.size)[mask]

array([ 2,  4,  5, 12, 13, 15, 20, 40, 50, 60])

In [None]:
reversed(sample_sizes)

In [50]:
[1,2,3].pop()

3

In [52]:
islice(([1,2],[2,3]),0,2)

<itertools.islice at 0x1c4a5e41c28>

In [55]:
graphsage = GraphSage(raw_features, INTERNAL_DIM, len(SAMPLE_SIZES), num_classes)

all_nodes = np.random.permutation(num_nodes)
train_nodes = all_nodes[:2048]
test_nodes = all_nodes[2048:]

# training

# 构造训练数据集
# batch = MiniBatch(src_nodes, dstsrc2srcs, dstsrc2dsts, dif_mats)
# 训练集对应的label
def generate_training_minibatch(nodes_for_training, all_labels, batch_size):
    while True:
        # 从 nodes_for_training 中随机采取 batch_size 个样本[4,6,34,98]
        mini_batch_nodes = np.random.choice ( nodes_for_training
                                            , size=batch_size
                                            , replace=False
                                            )
        batch = build_batch(mini_batch_nodes, neigh_dict, SAMPLE_SIZES)
        labels = all_labels[mini_batch_nodes]
        yield (batch, labels)

minibatch_generator = generate_training_minibatch(train_nodes, labels, BATCH_SIZE)

optimizer = tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()



In [1]:
# for inputs, inputs_labels in islice(minibatch_generator, 0, 1):
#     print(inputs)
#     print('*****','\n',len(inputs_labels))

In [92]:
def foo():
    print("starting...")
    j = 0
    while True:
        j = np.random.randint(5)
        yield j+1
g = foo()      
# print()
# t = next(g)
print("*"*20)
# print(next(g))

********************


In [96]:
for i in islice(g,4,5):
    print(i)

4


In [8]:
np.array([(k, v) for k in neigh_dict for v in neigh_dict[k]])

array([[ 163, 1536],
       [ 163, 2563],
       [ 163, 2564],
       ...,
       [1885, 1886],
       [1887, 2258],
       [1887, 1902]])

In [12]:
adj_mat_dict = neigh_dict
edges = np.array([(k, v) for k in adj_mat_dict for v in adj_mat_dict[k]])
# 节点列表
nodes = np.array(list(adj_mat_dict.keys()))

In [43]:
edges[[1,2,3],:].transpose()

array([[ 163,  163,  163],
       [2563, 2564, 1030]])

In [27]:
batchA,batchB = np.random.randint(1,9,(2,5))

In [28]:
batchA

array([2, 7, 5, 5, 2])

In [29]:
batchB

array([8, 5, 2, 3, 6])

In [31]:
np.concatenate([[1,2],[3,6,8,7]])

array([1, 2, 3, 6, 8, 7])

In [35]:
t1 = tf.ones_like([1,2,34,6])
t1

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([1, 1, 1, 1])>

In [48]:
t = np.random.randint(1,10,(2,5))
# t = tf.reduce_sum(t,axis=1)
t = tf.cast(t,tf.float32)

In [49]:
t

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[3., 3., 2., 3., 5.],
       [9., 5., 9., 3., 2.]], dtype=float32)>

In [46]:
tf.ones_like(t)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([1., 1.], dtype=float32)>

In [50]:
tf.nn.sigmoid_cross_entropy_with_logits(tf.ones_like(t) , t)

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[4.8587352e-02, 4.8587352e-02, 1.2692800e-01, 4.8587352e-02,
        6.7153485e-03],
       [1.2340219e-04, 6.7153485e-03, 1.2340219e-04, 4.8587352e-02,
        1.2692800e-01]], dtype=float32)>