In [82]:
import init

import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import normalize

from dataset.graph_loader import GraphLoader

In [3]:
loader = GraphLoader()

loader.run()



Graph(num_nodes=24258, num_edges=24258,
      ndata_schemes={'feat': Scheme(shape=(379,), dtype=torch.float32)}
      edata_schemes={})


  y = column_or_1d(y, warn=True)


In [4]:
loader.nodes_embedded_tensor

tensor([[   51,     0,    53,    52,    51,     0],
        [   52,    51,     0,    53,    52,    51],
        [   53,    52,    51,     0,    53,    52],
        ...,
        [19950, 19947, 19946, 20007,     0,    53],
        [19951, 19950, 19947, 19946, 20007,     0],
        [19952, 19950, 19947, 19946, 20007,     0]])

In [6]:
idx = loader.nodes_embedded_tensor[10]
idx

tensor([ 9,  8, 52, 51,  0, 53])

In [12]:
loader.nodes_features_tensor[idx]

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], grad_fn=<IndexBackward0>)

In [10]:
loader.nodes_features_tensor[idx].shape

torch.Size([6, 379])

In [14]:
loader.nodes_features_tensor[loader.nodes_embedded_tensor].shape

torch.Size([24258, 6, 379])

In [13]:
loader.nodes_features_tensor[loader.nodes_embedded_tensor]

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0

## Nodes Features (tags + attributes)

In [70]:
feat_tags_attrs = loader.nodes_features_tensor
feat_tags_attrs

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], requires_grad=True)

In [71]:
X_feat_tags_attrs = feat_tags_attrs.detach().numpy()
X_feat_tags_attrs

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [87]:
X_feat_tags_attrs = normalize(X_feat_tags_attrs, norm='l1', axis=1)
X_feat_tags_attrs.sum(axis=1)

array([1., 1., 1., ..., 1., 1., 1.], dtype=float32)

## Nodes Features (node2vec)

In [88]:
feat_node2vec = loader.nodes_features_tensor[loader.nodes_embedded_tensor].sum(dim=1)
feat_node2vec.shape

torch.Size([24258, 379])

In [89]:
X_feat_node2vec = feat_node2vec.detach().numpy()
X_feat_node2vec.shape

(24258, 379)

In [90]:
X_feat_node2vec = normalize(X_feat_node2vec, norm='l1', axis=1)
X_feat_node2vec.sum(axis=1)

array([1., 1., 1., ..., 1., 1., 1.], dtype=float32)

## Nodes Features (all: tags + attributes + node2vec)

In [92]:
X = np.concatenate((X_feat_tags_attrs, X_feat_node2vec), axis=1)
X.shape

(24258, 758)

In [95]:
X = normalize(X, norm='l2', axis=1)
X.sum(axis=1)

array([1.8569534, 2.4673972, 2.8560996, ..., 2.4622717, 2.4557624,
       2.4557624], dtype=float32)

## DBSCAN Clustering

In [96]:
dbscan = DBSCAN()
dbscan.fit(X)

In [99]:
labels = pd.Series(dbscan.labels_)
labels_counts = labels.value_counts()
labels_counts[labels_counts.index > -1]

20     3260
10      830
15      830
12      830
11      830
       ... 
236       5
222       5
238       5
212       5
241       5
Length: 256, dtype: int64

In [118]:
for i in labels_counts[labels_counts.index > -1].index.tolist():
    top_label = labels_counts.index.values[i]
    top_label

    top_nodes_idx = labels[labels == top_label].index.values
    top_nodes_idx

    nodes = np.array(loader.nodes)[top_nodes_idx]
    nodes = sorted(nodes, key=lambda x: x.get('graph_id'))
    print(np.array(nodes[:20]))
    print('\n\n')

[{'id': 54, 'parent_id': 7, 'graph_id': 0, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 52, 'parent_id': 7, 'graph_id': 1, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 25, 'parent_id': 7, 'graph_id': 2, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 53, 'parent_id': 7, 'graph_id': 3, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 132, 'parent_id': 7, 'graph_id': 4, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 56, 'parent_id': 7, 'graph_id': 5, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 53, 'parent_id': 7, 'graph_id': 6, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 68, 'parent_id': 7, 'graph_id': 7, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 25, 'parent_id': 7, 'graph_id': 8, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 55, 'parent_id': 7, 'graph_id': 9, 'features': [('tag', 'footer'), ('class', 'footer')]}
 {'id': 25, 'parent_id': 7, '

[{'id': 37, 'parent_id': 36, 'graph_id': 0, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 26px'), ('href', '/tag/inspirational/')]}
 {'id': 35, 'parent_id': 34, 'graph_id': 1, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 26px'), ('href', '/tag/inspirational/')]}
 {'id': 36, 'parent_id': 35, 'graph_id': 3, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 26px'), ('href', '/tag/inspirational/')]}
 {'id': 115, 'parent_id': 114, 'graph_id': 4, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 26px'), ('href', '/tag/inspirational/')]}
 {'id': 39, 'parent_id': 38, 'graph_id': 5, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 26px'), ('href', '/tag/inspirational/')]}
 {'id': 36, 'parent_id': 35, 'graph_id': 6, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 26px'), ('href', '/tag/inspirational/')]}
 {'id': 51, 'parent_id': 50, 'graph_id': 7, 'features': [('tag', 'a'), ('class',

[{'id': 45, 'parent_id': 44, 'graph_id': 0, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 14px'), ('href', '/tag/reading/')]}
 {'id': 43, 'parent_id': 42, 'graph_id': 1, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 14px'), ('href', '/tag/reading/')]}
 {'id': 44, 'parent_id': 43, 'graph_id': 3, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 14px'), ('href', '/tag/reading/')]}
 {'id': 123, 'parent_id': 122, 'graph_id': 4, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 14px'), ('href', '/tag/reading/')]}
 {'id': 47, 'parent_id': 46, 'graph_id': 5, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 14px'), ('href', '/tag/reading/')]}
 {'id': 44, 'parent_id': 43, 'graph_id': 6, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 14px'), ('href', '/tag/reading/')]}
 {'id': 59, 'parent_id': 58, 'graph_id': 7, 'features': [('tag', 'a'), ('class', 'tag'), ('style', 'font-size: 14px'

[{'id': 48, 'parent_id': 47, 'graph_id': 36, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'love')]}
 {'id': 110, 'parent_id': 109, 'graph_id': 36, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'love')]}
 {'id': 26, 'parent_id': 25, 'graph_id': 38, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'love')]}
 {'id': 71, 'parent_id': 70, 'graph_id': 58, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'love')]}
 {'id': 104, 'parent_id': 103, 'graph_id': 58, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'love')]}
 {'id': 48, 'parent_id': 47, 'graph_id': 93, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'love')]}
 {'id': 102, 'parent_id': 101, 'graph_id': 93, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywor

[{'id': 69, 'parent_id': 65, 'graph_id': 4, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/yourself/page/1/')]}
 {'id': 96, 'parent_id': 92, 'graph_id': 45, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/yourself/page/1/')]}
 {'id': 115, 'parent_id': 111, 'graph_id': 45, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/yourself/page/1/')]}
 {'id': 29, 'parent_id': 25, 'graph_id': 76, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/yourself/page/1/')]}
 {'id': 29, 'parent_id': 25, 'graph_id': 153, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/yourself/page/1/')]}
 {'id': 107, 'parent_id': 103, 'graph_id': 174, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/yourself/page/1/')]}
 {'id': 69, 'parent_id': 65, 'graph_id': 199, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/yourself/page/1/')]}
 {'id': 96, 'parent_id': 92, 'graph_id': 203, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/yourse

[{'id': 32, 'parent_id': 25, 'graph_id': 15, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/writers/page/1/')]}
 {'id': 126, 'parent_id': 119, 'graph_id': 16, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/writers/page/1/')]}
 {'id': 32, 'parent_id': 25, 'graph_id': 77, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/writers/page/1/')]}
 {'id': 32, 'parent_id': 25, 'graph_id': 127, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/writers/page/1/')]}
 {'id': 53, 'parent_id': 46, 'graph_id': 194, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/writers/page/1/')]}
 {'id': 126, 'parent_id': 119, 'graph_id': 195, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/writers/page/1/')]}
 {'id': 32, 'parent_id': 25, 'graph_id': 196, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/writers/page/1/')]}
 {'id': 126, 'parent_id': 119, 'graph_id': 209, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/writers/p

[{'id': 20, 'parent_id': 18, 'graph_id': 2, 'features': [('tag', 'span'), ('class', 'author-born-date')]}
 {'id': 20, 'parent_id': 18, 'graph_id': 8, 'features': [('tag', 'span'), ('class', 'author-born-date')]}
 {'id': 20, 'parent_id': 18, 'graph_id': 10, 'features': [('tag', 'span'), ('class', 'author-born-date')]}
 {'id': 20, 'parent_id': 18, 'graph_id': 11, 'features': [('tag', 'span'), ('class', 'author-born-date')]}
 {'id': 20, 'parent_id': 18, 'graph_id': 14, 'features': [('tag', 'span'), ('class', 'author-born-date')]}
 {'id': 20, 'parent_id': 18, 'graph_id': 17, 'features': [('tag', 'span'), ('class', 'author-born-date')]}
 {'id': 20, 'parent_id': 18, 'graph_id': 23, 'features': [('tag', 'span'), ('class', 'author-born-date')]}
 {'id': 20, 'parent_id': 18, 'graph_id': 26, 'features': [('tag', 'span'), ('class', 'author-born-date')]}
 {'id': 20, 'parent_id': 18, 'graph_id': 50, 'features': [('tag', 'span'), ('class', 'author-born-date')]}
 {'id': 20, 'parent_id': 18, 'graph_id'

[{'id': 24, 'parent_id': 22, 'graph_id': 9, 'features': [('tag', 'a'), ('href', '/author/George-R-R-Martin')]}
 {'id': 43, 'parent_id': 41, 'graph_id': 20, 'features': [('tag', 'a'), ('href', '/author/George-R-R-Martin')]}
 {'id': 22, 'parent_id': 20, 'graph_id': 58, 'features': [('tag', 'a'), ('href', '/author/George-R-R-Martin')]}
 {'id': 43, 'parent_id': 41, 'graph_id': 61, 'features': [('tag', 'a'), ('href', '/author/George-R-R-Martin')]}
 {'id': 24, 'parent_id': 22, 'graph_id': 68, 'features': [('tag', 'a'), ('href', '/author/George-R-R-Martin')]}
 {'id': 24, 'parent_id': 22, 'graph_id': 107, 'features': [('tag', 'a'), ('href', '/author/George-R-R-Martin')]}
 {'id': 24, 'parent_id': 22, 'graph_id': 118, 'features': [('tag', 'a'), ('href', '/author/George-R-R-Martin')]}
 {'id': 24, 'parent_id': 22, 'graph_id': 123, 'features': [('tag', 'a'), ('href', '/author/George-R-R-Martin')]}
 {'id': 24, 'parent_id': 22, 'graph_id': 126, 'features': [('tag', 'a'), ('href', '/author/George-R-R-

[{'id': 104, 'parent_id': 103, 'graph_id': 4, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 122, 'parent_id': 121, 'graph_id': 18, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 59, 'parent_id': 58, 'graph_id': 38, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 108, 'parent_id': 107, 'graph_id': 58, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 107, 'parent_id': 106, 'graph_id': 94, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 31, 'parent_id': 30, 'graph_id': 107, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 128, 'parent_id': 127, 'graph_id': 138, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 42, 'parent_id': 41, 'graph_id': 147, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 59, 'parent_id': 58, 'graph_id': 151, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 55, 'parent_id': 54, 'graph_id': 153, 'features': [('tag', 'li'), ('class', 'previous')]}
 {'id': 113, 'pa

[{'id': 98, 'parent_id': 96, 'graph_id': 4, 'features': [('tag', 'a'), ('href', '/author/Bob-Marley')]}
 {'id': 24, 'parent_id': 22, 'graph_id': 31, 'features': [('tag', 'a'), ('href', '/author/Bob-Marley')]}
 {'id': 46, 'parent_id': 44, 'graph_id': 36, 'features': [('tag', 'a'), ('href', '/author/Bob-Marley')]}
 {'id': 46, 'parent_id': 44, 'graph_id': 93, 'features': [('tag', 'a'), ('href', '/author/Bob-Marley')]}
 {'id': 53, 'parent_id': 51, 'graph_id': 138, 'features': [('tag', 'a'), ('href', '/author/Bob-Marley')]}
 {'id': 46, 'parent_id': 44, 'graph_id': 152, 'features': [('tag', 'a'), ('href', '/author/Bob-Marley')]}
 {'id': 49, 'parent_id': 47, 'graph_id': 160, 'features': [('tag', 'a'), ('href', '/author/Bob-Marley')]}
 {'id': 40, 'parent_id': 38, 'graph_id': 176, 'features': [('tag', 'a'), ('href', '/author/Bob-Marley')]}
 {'id': 98, 'parent_id': 96, 'graph_id': 199, 'features': [('tag', 'a'), ('href', '/author/Bob-Marley')]}
 {'id': 24, 'parent_id': 22, 'graph_id': 223, 'feat

[{'id': 86, 'parent_id': 85, 'graph_id': 37, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'death,inspirational')]}
 {'id': 86, 'parent_id': 85, 'graph_id': 96, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'death,inspirational')]}
 {'id': 86, 'parent_id': 85, 'graph_id': 263, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'death,inspirational')]}
 {'id': 26, 'parent_id': 25, 'graph_id': 330, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'death,inspirational')]}
 {'id': 59, 'parent_id': 58, 'graph_id': 363, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'death,inspirational')]}]



[{'id': 74, 'parent_id': 72, 'graph_id': 4, 'features': [('tag', 'a'), ('href', '/author/Albert-Einstein')]}
 {'id': 90, 'parent_id': 88, 'graph_id': 4, 'features': [('tag', 'a'), ('href',

[{'id': 47, 'parent_id': 42, 'graph_id': 38, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/women/page/1/')]}
 {'id': 39, 'parent_id': 34, 'graph_id': 147, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/women/page/1/')]}
 {'id': 47, 'parent_id': 42, 'graph_id': 151, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/women/page/1/')]}
 {'id': 36, 'parent_id': 31, 'graph_id': 157, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/women/page/1/')]}
 {'id': 30, 'parent_id': 25, 'graph_id': 220, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/women/page/1/')]}
 {'id': 30, 'parent_id': 25, 'graph_id': 253, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/women/page/1/')]}
 {'id': 39, 'parent_id': 34, 'graph_id': 302, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/women/page/1/')]}
 {'id': 36, 'parent_id': 31, 'graph_id': 320, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/women/page/1/')]}
 {'id': 3

[{'id': 37, 'parent_id': 35, 'graph_id': 20, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/misattributed-to-c-s-lewis/page/1/')]}
 {'id': 79, 'parent_id': 77, 'graph_id': 176, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/misattributed-to-c-s-lewis/page/1/')]}
 {'id': 27, 'parent_id': 25, 'graph_id': 228, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/misattributed-to-c-s-lewis/page/1/')]}
 {'id': 37, 'parent_id': 35, 'graph_id': 231, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/misattributed-to-c-s-lewis/page/1/')]}
 {'id': 37, 'parent_id': 35, 'graph_id': 262, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/misattributed-to-c-s-lewis/page/1/')]}]



[{'id': 17, 'parent_id': 16, 'graph_id': 45, 'features': [('tag', 'a'), ('href', '/tag/life/page/1/')]}
 {'id': 17, 'parent_id': 16, 'graph_id': 203, 'features': [('tag', 'a'), ('href', '/tag/life/page/1/')]}
 {'id': 49, 'parent_id': 48, 'graph_id': 204, 'features': [('ta

[{'id': 52, 'parent_id': 50, 'graph_id': 38, 'features': [('tag', 'a'), ('href', '/author/J-M-Barrie')]}
 {'id': 24, 'parent_id': 22, 'graph_id': 73, 'features': [('tag', 'a'), ('href', '/author/J-M-Barrie')]}
 {'id': 52, 'parent_id': 50, 'graph_id': 151, 'features': [('tag', 'a'), ('href', '/author/J-M-Barrie')]}
 {'id': 39, 'parent_id': 37, 'graph_id': 236, 'features': [('tag', 'a'), ('href', '/author/J-M-Barrie')]}
 {'id': 39, 'parent_id': 37, 'graph_id': 269, 'features': [('tag', 'a'), ('href', '/author/J-M-Barrie')]}
 {'id': 24, 'parent_id': 22, 'graph_id': 279, 'features': [('tag', 'a'), ('href', '/author/J-M-Barrie')]}]



[{'id': 38, 'parent_id': 37, 'graph_id': 45, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'life,love')]}
 {'id': 86, 'parent_id': 85, 'graph_id': 53, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'life,love')]}
 {'id': 26, 'parent_id': 25, 'graph_id': 152, 'features': [(

[{'id': 79, 'parent_id': 75, 'graph_id': 29, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/value/page/1/')]}
 {'id': 29, 'parent_id': 25, 'graph_id': 30, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/value/page/1/')]}
 {'id': 29, 'parent_id': 25, 'graph_id': 109, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/value/page/1/')]}
 {'id': 29, 'parent_id': 25, 'graph_id': 170, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/value/page/1/')]}
 {'id': 79, 'parent_id': 75, 'graph_id': 222, 'features': [('tag', 'a'), ('class', 'tag'), ('href', '/tag/value/page/1/')]}]



[{'id': 93, 'parent_id': 92, 'graph_id': 36, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'girls,love')]}
 {'id': 53, 'parent_id': 52, 'graph_id': 58, 'features': [('tag', 'meta'), ('class', 'keywords'), ('itemprop', 'keywords'), ('content', 'girls,love')]}
 {'id': 53, 'parent_id': 52, 'graph_id': 239, 'features': [('tag', 'meta')