In [2]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(name='CiteSeer', root='/Dataset/PyG/CiteSeer/Raw')
g = dataset.data 
g

Data(x=[3327, 3703], edge_index=[2, 9104], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327])

In [3]:
from collections import Counter 


def count_label(label, total):
    N = len(label)
    counter = Counter(label.tolist())
    d = dict(counter)
    
    print(f"{len(d)}类，{N} ({int(N * 100 / total)}%)")
    
    sum_ = sum(d.values())
    cnt_list = list(d.items())
    cnt_list.sort(key=lambda x: -x[1])
    
    str_list = []
    
    for lb, cnt in cnt_list:
        percent = int(cnt * 100 / sum_)
        str_list.append(f"{lb}: {cnt} ({percent}%)") 

    print(', '.join(str_list))


num_nodes = g.num_nodes 
feat = g.x 
edge_index = tuple(g.edge_index)
label = g.y 
train_mask = g.train_mask
val_mask = g.val_mask
test_mask = g.test_mask

count_label(label, num_nodes)
count_label(label[train_mask], num_nodes)
count_label(label[val_mask], num_nodes)
count_label(label[test_mask], num_nodes)

6类，3327 (100%)
3: 701 (21%), 2: 668 (20%), 4: 596 (17%), 1: 590 (17%), 5: 508 (15%), 0: 264 (7%)
6类，120 (3%)
3: 20 (16%), 1: 20 (16%), 5: 20 (16%), 0: 20 (16%), 2: 20 (16%), 4: 20 (16%)
6类，500 (15%)
2: 116 (23%), 3: 106 (21%), 4: 94 (18%), 1: 86 (17%), 5: 69 (13%), 0: 29 (5%)
6类，1000 (30%)
3: 231 (23%), 1: 182 (18%), 2: 181 (18%), 4: 169 (16%), 5: 160 (16%), 0: 77 (7%)


In [4]:
import dgl 

g = dgl.graph(edge_index, num_nodes=num_nodes)
g.ndata['feat'] = feat 
g.ndata['label'] = label
g.ndata['train_mask'] = train_mask
g.ndata['val_mask'] = val_mask
g.ndata['test_mask'] = test_mask

g

Graph(num_nodes=3327, num_edges=9104,
      ndata_schemes={'feat': Scheme(shape=(3703,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [5]:
import pickle 

g = dgl.to_bidirected(g, copy_ndata=True)
g = dgl.add_self_loop(dgl.remove_self_loop(g))

with open('/Dataset/PyG/CiteSeer/Processed/CiteSeer.dglg.pkl', 'wb') as fp:
    pickle.dump(g, fp)

g 

Graph(num_nodes=3327, num_edges=12431,
      ndata_schemes={'feat': Scheme(shape=(3703,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})