# Dataloader

A DataLoader is a convenient way to iterate over a dataset in a specific way. It takes a dataset object and a batch size as input, and returns an iterator over the dataset. The iterator returns a batch of data at each iteration, where the batch size is determined by the user. The DataLoader also provides additional features such as shuffling the data, and applying multi-threading for parallel processing.

In [5]:
from torch_geometric.utils import scatter
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for data in loader:
    print(data)

    print(data.num_graphs)
    
    x = scatter(data.x, data.batch, dim=0, reduce='mean')
    print(x.size())

print(dataset.data)

DataBatch(edge_index=[2, 4080], x=[1119, 21], y=[32], batch=[1119], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3512], x=[922, 21], y=[32], batch=[922], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4062], x=[1059, 21], y=[32], batch=[1059], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3724], x=[935, 21], y=[32], batch=[935], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4076], x=[1041, 21], y=[32], batch=[1041], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3906], x=[1001, 21], y=[32], batch=[1001], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4236], x=[1192, 21], y=[32], batch=[1192], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4102], x=[1023, 21], y=[32], batch=[1023], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 4236], x=[1149, 21], y=[32], batch=[1149], ptr=[33])
32
torch.Size([32, 21])
DataBatch(edge_index=[2, 3932], x=[1010, 21], y=[32], batch=[1010], ptr=[33])
32
torch