## Testing if anything works

In [1]:
import torch
torch.cuda.is_available()

False

In [2]:
torch.__version__

'1.9.0'

In [3]:
torch.rand(5,3)

tensor([[0.6383, 0.5959, 0.6232],
        [0.1191, 0.8039, 0.6905],
        [0.1223, 0.7219, 0.2641],
        [0.1298, 0.1866, 0.8986],
        [0.2556, 0.1959, 0.9379]])

In [4]:
!python --version

Python 3.8.8


In [5]:
import torch_geometric as tg

In [6]:
import torch_sparse as ts

In [7]:
!nvidia-smi

zsh:1: command not found: nvidia-smi


In [8]:
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)

In [9]:
disconnect=Data(x)

In [11]:
# Transfer data object to GPU.
# device = torch.device('cuda')
# data = data.to(device)

### Trying with real data

In [12]:
from torch_geometric.datasets import TUDataset

# dataset = TUDataset(root='data/TUDataset', name='ENZYMES')
dataset = TUDataset(root='data/TUDataset', name='PROTEINS')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/PROTEINS.zip
Extracting data/TUDataset/PROTEINS/PROTEINS.zip
Processing...
Done!


In [17]:
dataset.data

Data(x=[43471, 3], edge_index=[2, 162088], y=[1113])

In [None]:
# for data in dataset:
#     if data.y!=1:
#         print('new class')

In [None]:
from torch_geometric.loader import DataLoader as DataL
loader = DataL(dataset, batch_size=101, shuffle=0)
#edge index dimensions, node feature dimensions, target variable dimension, batch dim, ptr=indices for splitting 
#graphs in batch
for batch in loader:
    print(batch)

In [None]:
batch.ptr, batch.x[:22]

In [None]:
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.nn import GCNConv, global_mean_pool

class BaseGCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset.num_node_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = global_mean_pool(x, 101)
        return F.log_softmax(x, dim=101)
    
# Define our GCN class as a pytorch Module
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        # We inherit from pytorch geometric's GCN class, and we initialize three layers
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        # Our final linear layer will define our output
        self.lin = Linear(hidden_channels, dataset.num_classes)
        
    def forward(self, x, edge_index, batch):
      # 1. Obtain node embeddings 
      x = self.conv1(x, edge_index)
      x = x.relu()
      x = self.conv2(x, edge_index)
      x = x.relu()
      x = self.conv3(x, edge_index)   
 
      # 2. Readout layer
      x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]
 
      # 3. Apply a final classifier
      x = F.dropout(x, p=0.5, training=self.training)
      x = self.lin(x)
      return x
    
model = GCN(hidden_channels=64)

In [None]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = GCN(64).to(device)
# # data = dataset.to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# # model.train()
# # for epoch in range(200):
# #     for batch in loader:
# #         optimizer.zero_grad()
# #         out = model(batch)
# #         loss = F.nll_loss(out, batch.y)
# #         loss.backward()
# #         optimizer.step()

In [None]:
criterion = torch.nn.CrossEntropyLoss()
dataset = dataset.shuffle()
test_data=dataset[int(len(dataset)*0.8):]
train_data=dataset[:int(len(dataset)*0.8)]

train_loader=DataL(train_data, batch_size=64, shuffle=1)
test_loader=DataL(test_data, batch_size=64, shuffle=1)    
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# Initialize our train function
def train():
    model.train()
 
    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.
 # Define our test function
def test(loader):
    model.eval()
 
    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
      out = model(data.x, data.edge_index, data.batch)  
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.
tr_acc, te_acc=[],[]
# Run for 200 epochs (range is exclusive in the upper bound)
for epoch in range(200):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    tr_acc.append(train_acc)
    te_acc.append(test_acc)
    
    if (epoch+1)%10==0:
        print(f'Epoch: {epoch+1:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')


In [None]:
import matplotlib.pyplot as plt
plt.plot(tr_acc)
plt.plot(te_acc)

In [None]:
len(test_data), len(train_data) 

In [None]:
sum(test_data.data.y)/len(test_data.data.y)