In [2]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(name='Cora', root='/Dataset/PyG/Cora/Raw')
g = dataset.data 
g

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [3]:
from collections import Counter 


def count_label(label, total):
    N = len(label)
    counter = Counter(label.tolist())
    d = dict(counter)
    
    print(f"{len(d)}类，{N} ({int(N * 100 / total)}%)")
    
    sum_ = sum(d.values())
    cnt_list = list(d.items())
    cnt_list.sort(key=lambda x: -x[1])
    
    str_list = []
    
    for lb, cnt in cnt_list:
        percent = int(cnt * 100 / sum_)
        str_list.append(f"{lb}: {cnt} ({percent}%)") 

    print(', '.join(str_list))


num_nodes = g.num_nodes 
feat = g.x 
edge_index = tuple(g.edge_index)
label = g.y 
train_mask = g.train_mask
val_mask = g.val_mask
test_mask = g.test_mask

count_label(label, num_nodes)
count_label(label[train_mask], num_nodes)
count_label(label[val_mask], num_nodes)
count_label(label[test_mask], num_nodes)

7类，2708 (100%)
3: 818 (30%), 4: 426 (15%), 2: 418 (15%), 0: 351 (12%), 5: 298 (11%), 1: 217 (8%), 6: 180 (6%)
7类，140 (5%)
3: 20 (14%), 4: 20 (14%), 0: 20 (14%), 2: 20 (14%), 1: 20 (14%), 5: 20 (14%), 6: 20 (14%)
7类，500 (18%)
3: 158 (31%), 4: 81 (16%), 2: 78 (15%), 0: 61 (12%), 5: 57 (11%), 1: 36 (7%), 6: 29 (5%)
7类，1000 (36%)
3: 319 (31%), 4: 149 (14%), 2: 144 (14%), 0: 130 (13%), 5: 103 (10%), 1: 91 (9%), 6: 64 (6%)


In [4]:
import dgl 

g = dgl.graph(edge_index, num_nodes=num_nodes)
g.ndata['feat'] = feat 
g.ndata['label'] = label
g.ndata['train_mask'] = train_mask
g.ndata['val_mask'] = val_mask
g.ndata['test_mask'] = test_mask

g

Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [5]:
import pickle 

g = dgl.to_bidirected(g, copy_ndata=True)
g = dgl.add_self_loop(dgl.remove_self_loop(g))

with open('/Dataset/PyG/Cora/Processed/Cora.dglg.pkl', 'wb') as fp:
    pickle.dump(g, fp)

g 

Graph(num_nodes=2708, num_edges=13264,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})