# Imports

In [31]:
import os

import torch
import torch_geometric.transforms as T

from torch.utils.data import Dataset

from torch_geometric.datasets import Planetoid, TUDataset
from torch_geometric.loader import NeighborSampler
from torch_geometric.loader import DataLoader

import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')


In [24]:
DATA_PATH = './dataset'
MODEL_PATH = './models'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Planetoid - Cora

In [49]:
dataset = Planetoid(DATA_PATH, "Cora",  transform=T.NormalizeFeatures())

dataset
# Cora()

dataset.num_features
# 1433

1433

In [50]:
data = dataset[0]

data
# Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

data.num_features
# 1433

1433

In [51]:
type(dataset)
# torch_geometric.datasets.planetoid.Planetoid

isinstance(dataset, Dataset)
# True

True

In [52]:
dataloader = DataLoader(dataset, batch_size=128)

len(dataloader)

1

In [20]:
train_loader = NeighborSampler(
    data.edge_index, node_idx=None,
    sizes=[10, 10, 25], batch_size=128,
    shuffle=True, num_workers=8
)
test_loader = NeighborSampler(
    data.edge_index, node_idx=None,
    sizes=[10, 10, 25], batch_size=128,
    shuffle=False, num_workers=8
)

In [29]:
for i, batch in enumerate(train_loader):
    # print(i)
    if i==0:
        print(batch)


(128, tensor([2342, 1291, 1635,  ..., 2700, 2571, 1018]), [EdgeIndex(edge_index=tensor([[ 128,  129,  130,  ..., 1645,  498, 1243],
        [   0,    1,    1,  ..., 1243, 1244, 1244]]), e_id=tensor([ 9588,  5014,  5015,  ...,  7732, 10322, 10323]), size=(2010, 1245)), EdgeIndex(edge_index=tensor([[ 128,  129,  130,  ..., 1244,  127, 1243],
        [   0,    1,    1,  ...,  498,  499,  499]]), e_id=tensor([9588, 5014, 5015,  ..., 3181, 7701, 7702]), size=(1245, 500)), EdgeIndex(edge_index=tensor([[128, 129, 130, 131, 132, 133, 134, 135, 136,  28, 137, 138, 139, 140,
         141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,
         155, 156, 157, 158, 159, 160, 161, 162,  46, 163, 164, 165, 166, 167,
         168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
         182, 183, 184, 185, 186,  79, 185, 187, 188, 189, 190, 191,  87, 192,
         193, 194, 139, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205,
         206,   4,  32, 207, 208,

# TUDataset - PTC MR

In [53]:
dataset = TUDataset(DATA_PATH, name='PTC_MR')

dataset
# PTC_MR(344)

dataset.num_features
# 18

18

In [39]:
dataset[0]
# Data(edge_index=[2, 2], x=[2, 18], edge_attr=[2, 4], y=[1])

dataset[1]
# Data(edge_index=[2, 6], x=[4, 18], edge_attr=[6, 4], y=[1])

Data(edge_index=[2, 6], x=[4, 18], edge_attr=[6, 4], y=[1])

In [43]:
dataloader = DataLoader(dataset, batch_size=128)
len(dataloader)

3

In [47]:
for i, batch in enumerate(dataloader):
    print(batch)

DataBatch(edge_index=[2, 3606], x=[1755, 18], edge_attr=[3606, 4], y=[128], batch=[1755], ptr=[129])
DataBatch(edge_index=[2, 4248], x=[2049, 18], edge_attr=[4248, 4], y=[128], batch=[2049], ptr=[129])
DataBatch(edge_index=[2, 2254], x=[1111, 18], edge_attr=[2254, 4], y=[88], batch=[1111], ptr=[89])
