In [8]:
from Model import *
import torch as th,torch
import numpy as np
from dgl.contrib.data import load_data
import csv

# <B>aifb dataset

In [40]:
data = load_data(dataset='aifb')
num_nodes = data.num_nodes
num_rels = data.num_rels
num_classes = data.num_classes
labels = data.labels
train_idx = data.train_idx
# split training and validation set
val_idx = train_idx[:len(train_idx) // 5]
train_idx = train_idx[len(train_idx) // 5:]

# edge type and normalization factor
edge_type = torch.from_numpy(data.edge_type)
edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1)

labels = torch.from_numpy(labels).view(-1)


Loading dataset aifb
Number of nodes:  8285
Number of edges:  66371
Number of relations:  91
Number of classes:  4
removing nodes that are more than 3 hops away


In [41]:
# create graph
g = DGLGraph()
g.add_nodes(num_nodes)
g.add_edges(data.edge_src, data.edge_dst)
g.edata.update({'type': edge_type.long(), 'norm': edge_norm})
inputs = torch.arange(num_nodes).reshape(-1,1)

In [42]:
#aifb
RNN_input_size = num_nodes
RNN_hidden_size = 40
RGCN_input_size = 30
RGCN_hidden_size = 20
Num_classes = 4
Num_rels = 91
dropout = 0.5
activation = F.relu
sequence_length = 1
Num_bases=0
lr = 0.01 # learning rate
l2norm = 0 # L2 norm coefficient

n_epochs = 25 # epochs to train

In [43]:
model = Model(RNN_input_size,
                     RNN_hidden_size,
                     RGCN_input_size,
                     RGCN_hidden_size,
                     Num_classes,
                     Num_rels,
                     Num_bases=Num_bases,
                     Num_hidden_layers=0,
                     dropout=dropout)

In [18]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2norm)
criterion = nn.CrossEntropyLoss()
print("start training...")
model.train()
for epoch in range(n_epochs+1):
    optimizer.zero_grad()
    logits = model.forward(g,inputs,sequence_length)
    loss = criterion(logits[train_idx], labels[train_idx].long())
    loss.backward()

    optimizer.step()
    if not epoch % 5:
        train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx].long())
        train_acc = train_acc.item() / len(train_idx)
        val_loss = F.cross_entropy(logits[val_idx], labels[val_idx].long())
        val_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx].long())
        val_acc = val_acc.item() / len(val_idx)
        print("Epoch {:05d} | ".format(epoch) +
              "Train Accuracy: {:.4f} | Train Loss: {:.4f} | ".format(
                  train_acc, loss.item()) +
              "Validation Accuracy: {:.4f} | Validation loss: {:.4f}".format(
                  val_acc, val_loss.item()))

start training...
Epoch 00000 | Train Accuracy: 0.2768 | Train Loss: 1.3819 | Validation Accuracy: 0.2857 | Validation loss: 1.3844
Epoch 00005 | Train Accuracy: 0.8482 | Train Loss: 0.4807 | Validation Accuracy: 0.9286 | Validation loss: 0.4686
Epoch 00010 | Train Accuracy: 0.9732 | Train Loss: 0.1165 | Validation Accuracy: 0.9643 | Validation loss: 0.2175
Epoch 00015 | Train Accuracy: 0.9911 | Train Loss: 0.0181 | Validation Accuracy: 0.9643 | Validation loss: 0.3424
Epoch 00020 | Train Accuracy: 1.0000 | Train Loss: 0.0004 | Validation Accuracy: 0.9643 | Validation loss: 0.5597
Epoch 00025 | Train Accuracy: 1.0000 | Train Loss: 0.0001 | Validation Accuracy: 0.9643 | Validation loss: 0.7580


# <b>bgs dataset

In [45]:
data = load_data(dataset='bgs')
num_nodes = data.num_nodes
num_rels = data.num_rels
num_classes = data.num_classes
labels = data.labels
train_idx = data.train_idx
# split training and validation set
val_idx = train_idx[:len(train_idx) // 5]
train_idx = train_idx[len(train_idx) // 5:]

# edge type and normalization factor
edge_type = torch.from_numpy(data.edge_type)
edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1)

labels = torch.from_numpy(labels).view(-1)

Loading dataset bgs
Number of nodes:  333845
Number of edges:  2166243
Number of relations:  207
Number of classes:  2
removing nodes that are more than 3 hops away


In [46]:
# create graph
g_mutag = DGLGraph()
g_mutag.add_nodes(num_nodes)
g_mutag.add_edges(data.edge_src, data.edge_dst)
g_mutag.edata.update({'type': edge_type.long(), 'norm': edge_norm})
inputs = torch.arange(num_nodes).reshape(-1,1)

In [47]:
# mutag
RNN_input_size = num_nodes
RNN_hidden_size = 40
RGCN_input_size = 30
RGCN_hidden_size = 16
Num_classes = num_classes
Num_rels = num_rels
Num_bases=40
dropout = 0.5
activation = F.relu
sequence_length = 1
n_epochs = 25 # epochs to train
lr = 0.01 # learning rate
l2norm = 0 # L2 norm coefficient

In [48]:
model_mutag = Model(RNN_input_size,
                     RNN_hidden_size,
                     RGCN_input_size,
                     RGCN_hidden_size,
                     Num_classes,
                     Num_rels,
                     Num_bases=Num_bases,
                     Num_hidden_layers=0,
                     dropout=dropout)

In [11]:
optimizer = torch.optim.Adam(model_mutag.parameters(), lr=lr, weight_decay=l2norm)
criterion = nn.CrossEntropyLoss()
print("start training...")
model_mutag.train()
for epoch in range(n_epochs):
    optimizer.zero_grad()
    logits = model_mutag.forward(g_mutag,inputs,sequence_length)
    loss = criterion(logits[train_idx], labels[train_idx].long())
    loss.backward()

    optimizer.step()

    train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx].long())
    train_acc = train_acc.item() / len(train_idx)
    val_loss = F.cross_entropy(logits[val_idx], labels[val_idx].long())
    val_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx].long())
    val_acc = val_acc.item() / len(val_idx)
    print("Epoch {:05d} | ".format(epoch) +
          "Train Accuracy: {:.4f} | Train Loss: {:.4f} | ".format(
              train_acc, loss.item()) +
          "Validation Accuracy: {:.4f} | Validation loss: {:.4f}".format(
              val_acc, val_loss.item()))

start training...
Epoch 00000 | Train Accuracy: 0.4362 | Train Loss: 0.7028 | Validation Accuracy: 0.3478 | Validation loss: 0.7013
Epoch 00001 | Train Accuracy: 0.6489 | Train Loss: 1.0428 | Validation Accuracy: 0.7391 | Validation loss: 0.7068
Epoch 00002 | Train Accuracy: 0.6277 | Train Loss: 0.6322 | Validation Accuracy: 0.6957 | Validation loss: 0.6055
Epoch 00003 | Train Accuracy: 0.7872 | Train Loss: 0.6315 | Validation Accuracy: 0.6957 | Validation loss: 0.6552
Epoch 00004 | Train Accuracy: 0.6915 | Train Loss: 0.5927 | Validation Accuracy: 0.6522 | Validation loss: 0.6322
Epoch 00005 | Train Accuracy: 0.7447 | Train Loss: 0.5288 | Validation Accuracy: 0.6957 | Validation loss: 0.5864
Epoch 00006 | Train Accuracy: 0.8085 | Train Loss: 0.4472 | Validation Accuracy: 0.7391 | Validation loss: 0.5131
Epoch 00007 | Train Accuracy: 0.7979 | Train Loss: 0.3722 | Validation Accuracy: 0.7826 | Validation loss: 0.4476
Epoch 00008 | Train Accuracy: 0.8298 | Train Loss: 0.2968 | Validation

KeyboardInterrupt: 

# <b>mutag

In [31]:
data = load_data(dataset='mutag')
num_nodes = data.num_nodes
num_rels = data.num_rels
num_classes = data.num_classes
labels = data.labels
train_idx = data.train_idx
# split training and validation set
val_idx = train_idx[:len(train_idx) // 5]
train_idx = train_idx[len(train_idx) // 5:]

# edge type and normalization factor
edge_type = torch.from_numpy(data.edge_type)
edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1)

labels = torch.from_numpy(labels).view(-1)

Loading dataset mutag
Number of nodes:  23644
Number of edges:  172098
Number of relations:  47
Number of classes:  2
removing nodes that are more than 3 hops away


In [36]:
# create graph
g = DGLGraph()
g.add_nodes(num_nodes)
g.add_edges(data.edge_src, data.edge_dst)
g.edata.update({'type': edge_type.long(), 'norm': edge_norm})
inputs = torch.arange(num_nodes).reshape(-1,1)

In [37]:

RNN_input_size = num_nodes
RNN_hidden_size = 50
RGCN_input_size = 40
RGCN_hidden_size = 20
Num_classes = num_classes
Num_rels = num_rels
dropout = 0.5
activation = F.relu
sequence_length = 1
Num_bases=30
lr = 0.01 # learning rate
l2norm = 5e-4 # L2 norm coefficient
n_epochs = 30 # epochs to train


In [38]:
model = Model(RNN_input_size,
                     RNN_hidden_size,
                     RGCN_input_size,
                     RGCN_hidden_size,
                     Num_classes,
                     Num_rels,
                     Num_bases=Num_bases,
                     Num_hidden_layers=0,
                     dropout=dropout)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2norm)
criterion = nn.CrossEntropyLoss()
print("start training...")
model.train()
for epoch in range(n_epochs):
    optimizer.zero_grad()
    logits = model.forward(g,inputs,sequence_length)
    loss = criterion(logits[train_idx], labels[train_idx].long())
    loss.backward()

    optimizer.step()

    train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx].long())
    train_acc = train_acc.item() / len(train_idx)
    val_loss = F.cross_entropy(logits[val_idx], labels[val_idx].long())
    val_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx].long())
    val_acc = val_acc.item() / len(val_idx)
    print("Epoch {:05d} | ".format(epoch) +
          "Train Accuracy: {:.4f} | Train Loss: {:.4f} | ".format(
              train_acc, loss.item()) +
          "Validation Accuracy: {:.4f} | Validation loss: {:.4f}".format(
              val_acc, val_loss.item()))

In [13]:
def domodel190420(dataset = 'mutag'):
    data = load_data(dataset=dataset)
    num_nodes = data.num_nodes
    num_rels = data.num_rels
    num_classes = data.num_classes
    labels = data.labels
    train_idx = data.train_idx
    # split training and validation set
    val_idx = train_idx[:len(train_idx) // 5]
    train_idx = train_idx[len(train_idx) // 5:]

    # edge type and normalization factor
    edge_type = torch.from_numpy(data.edge_type)
    edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1)

    labels = torch.from_numpy(labels).view(-1)

    # create graph
    g = DGLGraph()
    g.add_nodes(num_nodes)
    g.add_edges(data.edge_src, data.edge_dst)
    g.edata.update({'type': edge_type.long(), 'norm': edge_norm})
    inputs = torch.arange(num_nodes).reshape(-1,1)

    RNN_Hidden_Size = []
    RGCN_Input_Size = []
    RGCN_Hidden_Size = []
    DROUPOUT = []
    Num_Bases = []
    Val_Acc = []
    Numbase = [40]
    if dataset == 'mutag':
        Numbase = [0,30]
    elif dataset == 'aifb':
        Numbase = [0]
    for RNN_hidden_size in [20,30,40,50]:
        for RGCN_input_size in [10,20,30,40]:
            for RGCN_hidden_size in [10,20,30,40]:
                for dropout in [0,0.1,0.2,0.3,0.4,0.5]:
                    for Num_bases in Numbase:
                        RNN_Hidden_Size.append(RNN_hidden_size)
                        RGCN_Input_Size.append(RGCN_input_size)
                        RGCN_Hidden_Size.append(RGCN_hidden_size)
                        RNN_input_size = num_nodes
                        DROUPOUT.append(dropout)
                        Num_Bases.append(Num_bases)
                        # RNN_hidden_size = 50
                        # RGCN_input_size = 40
                        # RGCN_hidden_size = 20
                        Num_classes = num_classes
                        Num_rels = num_rels
                        #dropout = 0.5
                        activation = F.relu
                        sequence_length = 1
                        #Num_bases=30
                        lr = 0.01 # learning rate
                        l2norm = 5e-4 # L2 norm coefficient
                        n_epochs = 50 # epochs to train

                        model = Model(RNN_input_size,
                                             RNN_hidden_size,
                                             RGCN_input_size,
                                             RGCN_hidden_size,
                                             Num_classes,
                                             Num_rels,
                                             Num_bases=Num_bases,
                                             Num_hidden_layers=0,
                                             dropout=dropout)

                        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2norm)
                        criterion = nn.CrossEntropyLoss()
                        print("start training...")
                        model.train()
                        for epoch in range(n_epochs):
                            optimizer.zero_grad()
                            logits = model.forward(g,inputs,sequence_length)
                            loss = criterion(logits[train_idx], labels[train_idx].long())
                            loss.backward()

                            optimizer.step()
                            train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx].long())
                            train_acc = train_acc.item() / len(train_idx)
                            if train_acc == 1:
                                break
                        val_loss = F.cross_entropy(logits[val_idx], labels[val_idx].long())
                        val_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx].long())
                        val_acc = val_acc.item() / len(val_idx)
                        print("Epoch {:05d} | ".format(epoch) +
                                "Train Accuracy: {:.4f} | Train Loss: {:.4f} | ".format(
                                    train_acc, loss.item()) +
                                "Validation Accuracy: {:.4f} | Validation loss: {:.4f}".format(
                                    val_acc, val_loss.item()))
                        Val_Acc.append(val_acc)
    c=[]
    for i in range(len(RNN_Hidden_Size)):
        c.append([RNN_Hidden_Size[i],RGCN_Input_Size[i],RGCN_Hidden_Size[i],DROUPOUT[i],Num_Bases[i],Val_Acc[i]])
    with open('result.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for row in c:
            writer.writerow(row)

In [14]:
datasets = ['aifb','mutag','bgs','am']
domodel190420('aifb')

Loading dataset aifb
Number of nodes:  8285
Number of edges:  66371
Number of relations:  91
Number of classes:  4
removing nodes that are more than 3 hops away
start training...




Epoch 00039 | Train Accuracy: 1.0000 | Train Loss: 0.0270 | Validation Accuracy: 0.9286 | Validation loss: 0.3041
