<a href="https://colab.research.google.com/github/adithyamauryakr/pytorchtutorials/blob/main/dataset_dataloader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [3]:
import torch_geometric
from torch_geometric.datasets import Planetoid

In [4]:
dataset = Planetoid(root='tutorial1', name='Cora')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


### Cora: different papers and how they are cited among them

Dataset properties:

In [5]:
print(dataset)
print('no of graphs: \t\t', len(dataset))
print('no of clases: \t\t', dataset.num_classes)
print('no of node features:\t', dataset.num_node_features)
print('no of edge features: \t', dataset.num_edge_features)

Cora()
no of graphs: 		 1
no of clases: 		 7
no of node features:	 1433
no of edge features: 	 0


Dataset shapes

In [6]:
print(dataset.data)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])




In [9]:
print('edge index: \t\t', dataset.edge_index.shape)
print(dataset.data.edge_index)
print('\n')
print(
    'train_mask: \t\t', dataset.train_mask.shape,
    '\n',
    dataset.data.train_mask,
    '\n',
    'x:\t\t', dataset.data.x.shape,
    '\n',
    dataset.data.x,
    '\n',
    'y:\t\t', dataset.data.y.shape,
    '\n',
    dataset.data.y
)

edge index: 		 torch.Size([2, 10556])
tensor([[ 633, 1862, 2582,  ...,  598, 1473, 2706],
        [   0,    0,    0,  ..., 2707, 2707, 2707]])


train_mask: 		 torch.Size([2708]) 
 tensor([ True,  True,  True,  ..., False, False, False]) 
 x:		 torch.Size([2708, 1433]) 
 tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) 
 y:		 torch.Size([2708]) 
 tensor([3, 4, 4,  ..., 3, 3, 3])


In [10]:
import os.path as osp

import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv

In [11]:
data = dataset[0]

In [12]:
class Net(torch.nn.Module):
  def __init__(self):
    super(Net, self).__init__()

    self.conv = SAGEConv(dataset.num_features,
                         dataset.num_classes,
                         aggr='max') # max, mean, add ...

  def forward(self):
    x = self.conv(data.x, data.edge_index)
    return F.log_softmax(x, dim=1)

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [20]:
def train():
  model.train()
  optimizer.zero_grad()
  F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
  optimizer.step()

def test():
  model.eval()
  logits, accs = model(), []

  for _, mask in data('train_mask', 'val_mask', 'test_mask'):
    pred = logits[mask].max(1)[1]
    acc = pred.eq(data.y[mask]).sum().item()/mask.sum().item()
    accs.append(acc)
  return accs


In [21]:
  best_val_acc = test_acc = 0

  for epoch in range(1, 100):
    train()
    _, val_acc, tmp_test_acc = test()

    if val_acc > best_val_acc:
      best_val_acc = val_acc
      test_acc = tmp_test_acc

    log = 'Epoch: {:03d}, Val: {:.4f}, Test: {:.4f}'

    if epoch % 10 == 0:
      print(log.format(epoch, best_val_acc, test_acc))

Epoch: 010, Val: 0.7320, Test: 0.7300
Epoch: 020, Val: 0.7320, Test: 0.7300
Epoch: 030, Val: 0.7320, Test: 0.7300
Epoch: 040, Val: 0.7320, Test: 0.7300
Epoch: 050, Val: 0.7320, Test: 0.7300
Epoch: 060, Val: 0.7320, Test: 0.7300
Epoch: 070, Val: 0.7320, Test: 0.7300
Epoch: 080, Val: 0.7320, Test: 0.7300
Epoch: 090, Val: 0.7320, Test: 0.7300
