# PyG的Data

In [86]:
import torch
from torch_geometric.data import Data
DATA_PATH = "../datas/"

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)
data

Data(x=[3, 1], edge_index=[2, 4])

In [87]:
print(data.keys)
print("-" * 50)
print(data['x'])
print("-" * 50)
for key, item in data:
    print(f'{key} found in data')
print("-" * 50)
print('edge_attr' in data)
print("-" * 50)
print(data.num_nodes)
print("-" * 50)
print(data.num_edges)
print("-" * 50)
print(data.num_node_features)
print("-" * 50)
print(data.has_isolated_nodes())
print("-" * 50)
print(data.has_self_loops())
print("-" * 50)
print(data.is_directed())
print("-" * 50)

# Transfer data object to GPU.
device = torch.device('cuda')
data = data.to(device)

['x', 'edge_index']
--------------------------------------------------
tensor([[-1.],
        [ 0.],
        [ 1.]])
--------------------------------------------------
x found in data
edge_index found in data
--------------------------------------------------
False
--------------------------------------------------
3
--------------------------------------------------
4
--------------------------------------------------
1
--------------------------------------------------
False
--------------------------------------------------
False
--------------------------------------------------
False
--------------------------------------------------


# 通用基准数据集

In [88]:
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root=DATA_PATH + 'ENZYMES', name='ENZYMES')
print("length of ENZYMES:{}".format(len(dataset)))
print("num classes:{}".format(dataset.num_classes))
print("num node features:{}".format(dataset.num_node_features))
print(dataset[0])
print(dataset[:20]) # dataset切片
print(dataset[0].is_undirected())
# print(dataset.__dict__) #查看具体属性

length of ENZYMES:600
num classes:6
num node features:3
Data(edge_index=[2, 168], x=[37, 3], y=[1])
ENZYMES(20)
True


In [89]:
dataset = dataset.shuffle() # dataset打乱
# 上述方法等于
perm = torch.randperm(len(dataset))
dataset = dataset[perm]

In [90]:
'''from torch_geometric.datasets import Planetoid

dataset = Planetoid(root=DATA_PATH + 'Cora', name='Cora')
print("length of Cora:{}".format(len(dataset)))
print("num classes:{}".format(dataset.num_classes))
print("num node features:{}".format(dataset.num_node_features))'''

'from torch_geometric.datasets import Planetoid\n\ndataset = Planetoid(root=DATA_PATH + \'Cora\', name=\'Cora\')\nprint("length of Cora:{}".format(len(dataset)))\nprint("num classes:{}".format(dataset.num_classes))\nprint("num node features:{}".format(dataset.num_node_features))'

# mini-batch

In [91]:
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

dataset = TUDataset(root=DATA_PATH + 'ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for batch in loader:
    print(batch)
    break

print(dataset[0])
print(dataset[200])
print(list(batch.__dict__["_store"].keys()))

DataBatch(edge_index=[2, 4086], x=[1081, 21], y=[32], batch=[1081], ptr=[33])
Data(edge_index=[2, 168], x=[37, 21], y=[1])
Data(edge_index=[2, 106], x=[29, 21], y=[1])
['edge_index', 'x', 'y', 'batch', 'ptr']
