# Introuction by Example
https://pytorch-geometric.readthedocs.io/en/latest/get_started/introduction.html

# Data Handling of Graphs
`torch_geometric.data.Data`
* `data.x` : `[num_nodes, num_nodes_features]`
node feature matrix
* `data.edge_index` : `[2, num_edges]`
graph connectivity in COO format
* `data.edge_attribute` : `[num_edges, num_edge_features]`
edge feature matrix
* `data.y`
target to train against
** `[num_nodes, *]`
node-level target
** `[1, *]`
graph-level target
* `data.pos` : `[num_nodes, num_dimensions]`
node position matrix

In [6]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)
data

Data(x=[3, 1], edge_index=[2, 4])

In [11]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1],
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index.t().contiguous())
data

Data(x=[3, 1], edge_index=[2, 4])

In [13]:
data.keys

['edge_index', 'x']

In [14]:
data['x']

tensor([[-1.],
        [ 0.],
        [ 1.]])

In [15]:
for key, item in data:
    print(f'{key} found in data')

x found in data
edge_index found in data


In [16]:
data.num_nodes

3

In [17]:
data.num_edges

4

In [18]:
data.num_node_features

1

In [19]:
data.has_isolated_nodes()

False

In [20]:
data.has_self_loops()

False

In [21]:
data.is_directed()

False

In [24]:
device = torch.device('cpu')
data = data.to(device)

# Common Benchmark Datasets

In [25]:
from torch_geometric.datasets import TUDataset
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
dataset

Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Extracting /tmp/ENZYMES/ENZYMES/ENZYMES.zip
Processing...
Done!


ENZYMES(600)

In [26]:
len(dataset)

600

In [27]:
dataset.num_classes

6

In [28]:
dataset.num_node_features

3

In [29]:
data = dataset[0]
data.is_undirected()

True

In [30]:
train_dataset = dataset[:540]
train_dataset

ENZYMES(540)

In [31]:
test_dataset = dataset[540:]
test_dataset

ENZYMES(60)

In [33]:
dataset = dataset.shuffle()
dataset

ENZYMES(600)

In [34]:
perm = torch.randperm(len(dataset))
perm

tensor([235, 370, 131, 266,  92, 367, 175, 363, 155, 591, 209, 100, 204, 551,
        104, 471,  54, 407, 499,   9, 394, 503, 513, 336, 221, 422, 459, 534,
         56,  50, 375, 395, 154, 553, 292,  18, 558, 409, 296,  66, 187, 564,
        380,  10, 365, 396,  72, 164, 200, 599, 105, 218, 203,  79, 208, 338,
         97, 364, 561, 508, 173, 552, 478, 103, 141, 451, 491, 210, 220,  71,
        465, 229, 423, 116, 185, 449, 191,  12, 186,  35,  99, 412, 510,  33,
        523,  32, 139, 206,   3, 440, 581, 541, 302, 337, 299, 245, 306, 377,
        354, 556, 489, 501, 343, 352, 429,  73,   8, 488, 318,   7, 219, 507,
        568, 515,   6,  17,  96, 531, 215, 550, 557, 147, 542,  15, 469, 481,
         19, 234,  29, 190, 198, 425, 156, 357,   4, 569, 316, 311, 460, 140,
         31, 142, 335, 315, 447, 297, 340,  22, 575, 383, 184, 356, 202, 538,
        445, 477, 362, 182, 239, 464, 232, 328, 502, 146, 262, 570,  52, 572,
         47, 434,  34, 243, 165, 265,  68, 415, 421, 441, 587, 4

In [35]:
dataset = dataset[perm]
dataset

ENZYMES(600)

In [40]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

len(dataset)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


1

In [41]:
dataset.num_classes

7

In [42]:
dataset.num_node_features

1433

In [43]:
data = dataset[0]
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [44]:
data.is_undirected()

True

In [45]:
data.train_mask.sum().item()

140

In [46]:
data.val_mask.sum().item()

500

In [48]:
data.test_mask.sum().item()

1000

# Mini-batches

In [49]:
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for batch in loader:
    batch
    batch.num_graphs