<a href="https://colab.research.google.com/github/ankit-singh973/Graph_Neural_Network/blob/main/1_SageConv_cora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **PyTorch Geometric**

In [None]:
!pip install torch_sparse
!pip install torch-scatter

In [1]:
pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.5.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.5.0


In [2]:
import torch_geometric
import pandas as pd
from torch_geometric.datasets import Planetoid

# **Load dataset**

In [3]:
dataset = Planetoid(root = "/content/drive/MyDrive/PyTorch_Graph/data1", name = "Cora")

### **Dataset properties**

In [4]:
print(dataset)
print("number of graphs:\t\t", len(dataset)) # we have one huge graph
print("number of classes:\t\t", dataset.num_classes)
print("number of node features:\t",dataset.num_node_features)
print("number of edge features:\t", dataset.num_edge_features)

Cora()
number of graphs:		 1
number of classes:		 7
number of node features:	 1433
number of edge features:	 0


### **Dataset Shape**

In [5]:
print(dataset.data)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])




In [6]:
print("shape of edge_index:\t\t", dataset.data.edge_index.shape)
print("edge_index: ", dataset.data.edge_index)

shape of edge_index:		 torch.Size([2, 10556])
edge_index:  tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
        [ 633, 1862, 2582,  ...,  598, 1473, 2706]])


In [7]:
print("shape of train_mask:\t\t", dataset.data.train_mask.shape)
print("train_mask:",dataset.data.train_mask)

shape of train_mask:		 torch.Size([2708])
train_mask: tensor([ True,  True,  True,  ..., False, False, False])


### **X is node feature**

In [8]:
print("shape of x:\t\t", dataset.data.x.shape)
print("X_dataset:",dataset.data.x)

shape of x:		 torch.Size([2708, 1433])
X_dataset: tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


### **Y is node label**

In [9]:
print("shape of y: ", dataset.data.y.shape)
print("Y_dataset: ", dataset.data.y)

shape of y:  torch.Size([2708])
Y_dataset:  tensor([3, 4, 4,  ..., 3, 3, 3])


In [10]:
dataset.data.y.unique()

tensor([0, 1, 2, 3, 4, 5, 6])

In [11]:
import os.path as osp
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv

In [12]:
data = dataset[0]

In [13]:
print(dataset.num_features)

1433


In [14]:
print(dataset.num_classes)

7


## **Here we are going to define our neural network**

In [15]:
class Net(torch.nn.Module):
  def __init__(self):
    super(Net, self).__init__()

# SAGEConv is a convulation layer, it requires a input layer and a output layer
    self.conv = SAGEConv(dataset.num_features, #number of iutput node will be 1433
                         dataset.num_classes, #number of output node will be 7
                         aggr = "max") #aggregation = max, mean, min, add, etc.

  def forward(self):
    x = self.conv(data.x, data.edge_index)
    return F.log_softmax(x, dim=1)

In [19]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001, weight_decay = 5e-4)

In [20]:
# TO TRAIN THE MODEL
def train():
  model.train()
  optimizer.zero_grad()
  F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
  optimizer.step()


# TO TEST
def test():
  model.eval()
  logits, accs = model(), []
  for _, mask in data("train_mask", "val_mask", "test_mask"):
    pred = logits[mask].max(1)[1]
    acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
    accs.append(acc)
  return accs

In [22]:
best_val_acc = test_acc = 0
for epoch in range(1, 100):
  train()
  _, val_acc, tmp_test_acc = test()
  if val_acc > best_val_acc:
    best_val_acc = val_acc
    test_acc = tmp_test_acc

  log = "Epoch: {:03d}, Val: {:.4f}, Test: {:.4f}"

  if epoch % 10 == 0:
    print(log.format(epoch, best_val_acc, test_acc))



Epoch: 010, Val: 0.7120, Test: 0.7080
Epoch: 020, Val: 0.7120, Test: 0.7080
Epoch: 030, Val: 0.7120, Test: 0.7080
Epoch: 040, Val: 0.7120, Test: 0.7080
Epoch: 050, Val: 0.7120, Test: 0.7080
Epoch: 060, Val: 0.7140, Test: 0.7080
Epoch: 070, Val: 0.7140, Test: 0.7080
Epoch: 080, Val: 0.7140, Test: 0.7080
Epoch: 090, Val: 0.7140, Test: 0.7080
