In [1]:
# NN related libraries
import torch 
import torch.nn as nn 
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter

import sys 
sys.path.append('./')

# from the code 
from model.GAT import GAT
from utils.layers import GAT_layer

# data related 
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

import time
import os



In [2]:
dataset = Planetoid(root='/tmp/Cora', name='Cora',transform=T.NormalizeFeatures())
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7


In [3]:
edge_index=dataset[0].edge_index
nodes_features=dataset[0].x
nodes_labels=dataset[0].y

#parameters_GAT_network={'num_features_per_layer'={}}



### Bellow we just test the GAT_layer to check that is returning what we expect

In [5]:
layer_test=GAT_layer(nodes_features.shape[0],nodes_features.shape[1],8,8,nn.ELU)

In [6]:
layer_test((nodes_features,edge_index))

(tensor([[ 0.1441,  0.0197, -0.0162,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0667,  0.1129, -0.0322,  ...,  0.0610,  0.0526, -0.0135],
         [ 0.0410,  0.0206, -0.0472,  ...,  0.0064, -0.0332, -0.0613],
         ...,
         [-0.0078,  0.1516, -0.0268,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.5963,  0.0873, -0.3285,  ..., -0.0198,  0.1006, -0.1098],
         [ 0.0622,  0.2269, -0.0720,  ..., -0.0416,  0.0839,  0.1467]],
        grad_fn=<ViewBackward0>),
 tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
         [ 633, 1862, 2582,  ...,  598, 1473, 2706]]))

### Bellow we test the GAT network. We put two layers, similar to the model used in the experiment in the original paper, which is the one we'll use latter for training

In [11]:
model_test=GAT(2,nodes_features.shape[0],[nodes_features.shape[1],8,7],[8,1])

In [12]:
out_features=model_test((nodes_features,edge_index))

In [13]:
out_features[0][0]

tensor([0.1758, 0.2000, 0.1018, 0.1263, 0.1113, 0.1947, 0.0901],
       grad_fn=<SelectBackward0>)

In [14]:
#let's check that the output has the right shape to pass to the loss function
loss=nn.CrossEntropyLoss()
loss(out_features[0],nodes_labels)

tensor(1.9661, grad_fn=<NllLossBackward0>)

## Training the network in the cora dataset

### First we define the hyperparameters of the network

In [4]:
C=7 # number of classes of the cora dataset
params_network={'num_layers':2,
               'num_nodes':nodes_features.shape[0],
                'num_features_per_layer':[nodes_features.shape[1],8,C],
                'num_heads_per_layer':[8,1],
                 'num_epochs':500
               }


### Divide the dataset in training, validation and test

In [10]:
# indices of each set according to the masks given in the dataset (i.e we use the same assignation as in the original paper)
training_set_indices=(dataset[0].train_mask).nonzero(as_tuple=False).flatten()
test_set_indices=(dataset[0].test_mask).nonzero(as_tuple=False).flatten()
val_set_indices=(dataset[0].val_mask).nonzero(as_tuple=False).flatten()
print('The training dataset starts in node {:} and comprises {:} nodes'.format(training_set_indices[0].numpy(),training_set_indices.shape[0]))
print('The validation dataset starts in node {:} and comprises {:} nodes'.format(test_set_indices[0].numpy(),test_set_indices.shape[0]))
print('The test dataset starts in node {:} and comprises {:} nodes'.format(val_set_indices[0].numpy(),val_set_indices.shape[0]))


The training dataset starts in node 0 and comprises 140 nodes
The validation dataset starts in node 1708 and comprises 1000 nodes
The test dataset starts in node 140 and comprises 500 nodes


In [11]:
#Extract the labels for the training set
nodes_labels_training_set=nodes_labels.index_select(0,training_set_indices)
#validation
nodes_labels_validation_set=nodes_labels.index_select(0,val_set_indices)
#test
nodes_labels_test_set=nodes_labels.index_select(0,test_set_indices)

Now we have everything we need in order to start the training process. Let's define the model and run the learning process

In [5]:
model=GAT(params_network['num_layers'],params_network['num_nodes'],params_network['num_features_per_layer'],params_network['num_heads_per_layer'])

In [25]:
# Let's run the training loop 
def train_gat(params_network,num_epochs=10000,val_lapse=1000,perform_test='True'):
    #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device=torch.device('mps')
    time_start=time.time()
    model_gat=GAT(params_network['num_layers'],params_network['num_nodes'],params_network['num_features_per_layer'],params_network['num_heads_per_layer']).to(device)
    
    loss_fn = nn.CrossEntropyLoss()
    optimizer = Adam(model_gat.parameters(), lr=0.01,weight_decay=0.0005) # weight decay corresponds to the L2 penalty, which in the original implementation is chosen to the value we put here
    
    graph_data=(nodes_features,edge_index)
    for epoch in range(num_epochs):
        print(epoch)
        
        model_gat.train() #set model in training mode
        
        # We do a forward pass of the model and extract the unnormalized logits for the training set 
        # shape = (N, C) where N is the number of nodes in the split (train/val/test) and C is the number of classes
        nodes_unnormalized_out_train = model_gat(graph_data)[0].index_select(0,training_set_indices)
        
        loss=loss_fn(nodes_unnormalized_out_train,nodes_labels_training_set)
        
        #Optimizer backward evaluation
        
        optimizer.zero_grad()  
        loss.backward()  
        optimizer.step()  
        
        # Compute the accuracy

        # Finds the index of maximum (unnormalized) score for every node and that's the class prediction for that node.
        # Compare those to true (ground truth) labels and find the fraction of correct predictions -> accuracy metric.
        predictions = torch.argmax(nodes_unnormalized_out_train, dim=-1)
        accuracy = torch.sum(torch.eq(predictions, nodes_labels_training_set).long()).item() / len(nodes_labels_training_set)
        
        #TensorBoard summary writter 
        writer=SummaryWriter()
        
        writer.add_scalar('Loss/train',loss.item(),epoch)
        writer.add_scalar('Accuracy/train',accuracy,epoch)
        
        print(f'time elapsed={(time.time()-time_start):.2f} [s]')
        print(f'accuracy test={accuracy:.3f}')
        if (epoch+1)%val_lapse==0:
            with torch.no_grad():
                nodes_unnormalized_out_val = model_gat(graph_data)[0].index_select(0,val_set_indices)
                loss_val=loss_fn(nodes_unnormalized_out_val,nodes_labels_validation_set)
                predictions = torch.argmax(nodes_unnormalized_out_val, dim=-1)
                accuracy = torch.sum(torch.eq(predictions, nodes_labels_validation_set).long()).item() / len(nodes_labels_validation_set)
        
                
                writer.add_scalar('Loss/validation',loss_val.item(),epoch)
                writer.add_scalar('Accuracy/validation',accuracy,epoch)
                print(f'GAT training: time elapsed= {(time.time() - time_start):.2f} [s] | epoch={epoch + 1} | val acc={accuracy}')
        
    if perform_test:
        with torch.no_grad():
            nodes_unnormalized_out_test = model_gat(graph_data)[0].index_select(0,test_set_indices)
            loss_test=loss_fn(nodes_unnormalized_out_test,nodes_labels_test_set)
            predictions = torch.argmax(nodes_unnormalized_out_test, dim=-1)
            accuracy = torch.sum(torch.eq(predictions, nodes_labels_test_set).long()).item() / len(nodes_labels_test_set)
        print(f'Test accuracy = {test_acc}')
    torch.save({'state_dict':model.state_dict},os.path.join(os.path.dirname('model'), 'model','saved_model','model_gat_trained.pt'))
    writer.close()
        
        

        


        
        
    
    
    
    
    

In [26]:
train_gat(params_network,num_epochs=1001)

RuntimeError: The MPS backend is supported on MacOS 12.3+.Current OS version can be queried using `sw_vers`

In [24]:
torch.mps.is_available

AttributeError: module 'torch' has no attribute 'mps'

In [7]:
os.path.join(os.path.dirname('model'), 'model','saved_model')

'model/saved_model'

In [24]:
os.path.dirname('model')

''