In [1]:
! pip install dgl           # For CPU Build
! pip install dgl-cu101     # For CUDA 10.1 Build

# Do not forget to put manually colab in gpu mode

Collecting dgl
[?25l  Downloading https://files.pythonhosted.org/packages/46/62/da7146c0e46f93dd1f17cccea3010def155a1f479c0b036b604e952f321f/dgl-0.5.3-cp36-cp36m-manylinux1_x86_64.whl (3.6MB)
[K     |████████████████████████████████| 3.6MB 19.3MB/s 
Installing collected packages: dgl
Successfully installed dgl-0.5.3
Collecting dgl-cu101
[?25l  Downloading https://files.pythonhosted.org/packages/77/08/ea2d56e85eba1c22a14fa0f9b3c4ca8b43bf07de34e454d4e23632b376ea/dgl_cu101-0.5.3-cp36-cp36m-manylinux1_x86_64.whl (25.0MB)
[K     |████████████████████████████████| 25.0MB 133kB/s 
Installing collected packages: dgl-cu101
Successfully installed dgl-cu101-0.5.3


### Setting the environnement

In [None]:
import argparse
from os import path

import numpy as np
import torch
import torch.nn.functional as F
from dgl import batch
from dgl.data.ppi import LegacyPPIDataset
from dgl.nn.pytorch import GraphConv
from sklearn.metrics import f1_score
from torch import nn, optim
from torch.utils.data import DataLoader


JUPYTER = True

MODEL_STATE_FILE = 0
if JUPYTER:
    import os
    path = ""
    MODEL_STATE_FILE = path.join((os.path.abspath(''), "/model_state.pth"))
    MODEL_STATE_FILE = "model_state.pth"
else:
    MODEL_STATE_FILE = path.join(path.dirname(path.abspath(__file__)), "model_state.pth")


### Architecture

In [2]:
class BasicGraphModel(nn.Module):

    def __init__(self, g, n_layers, input_size, hidden_size, output_size, nonlinearity):
        super().__init__()

        self.g = g
        self.layers = nn.ModuleList()
        self.layers.append(
            GraphConv(input_size, hidden_size, activation=nonlinearity))
        for i in range(n_layers - 1):
            self.layers.append(
                GraphConv(hidden_size, hidden_size, activation=nonlinearity))
        self.layers.append(GraphConv(hidden_size, output_size))

    def forward(self, inputs):
        outputs = inputs
        for i, layer in enumerate(self.layers):
            outputs = layer(self.g, outputs)
        return outputs



DGL backend not selected or invalid.  Assuming PyTorch for now.
Using backend: pytorch


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


### Main, Train, Test

In [None]:

def main(args):
    # create the dataset
    train_dataset, test_dataset = LegacyPPIDataset(
        mode="train"), LegacyPPIDataset(mode="test")
    train_dataloader = DataLoader(
        train_dataset, batch_size=args.batch_size, collate_fn=collate_fn)
    test_dataloader = DataLoader(
        test_dataset, batch_size=args.batch_size, collate_fn=collate_fn)
    n_features, n_classes = train_dataset.features.shape[1], train_dataset.labels.shape[1]

    # create the model, loss function and optimizer
    device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu))
    model = BasicGraphModel(g=train_dataset.graph, n_layers=2, input_size=n_features,
                            hidden_size=256, output_size=n_classes, nonlinearity=F.elu).to(device)
    loss_fcn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters())

    # train and test
    if args.mode == "train":
        train(model, loss_fcn, device, optimizer,
              train_dataloader, test_dataset)
        torch.save(model.state_dict(), MODEL_STATE_FILE)
    model.load_state_dict(torch.load(MODEL_STATE_FILE))
    return test(model, loss_fcn, device, test_dataloader)


def train(model, loss_fcn, device, optimizer, train_dataloader, test_dataset):
    for epoch in range(args.epochs):
        model.train()
        losses = []
        for batch, data in enumerate(train_dataloader):
            subgraph, features, labels = data
            features = features.to(device)
            labels = labels.to(device)
            model.g = subgraph
            for layer in model.layers:
                layer.g = subgraph
            logits = model(features.float())
            loss = loss_fcn(logits, labels.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        loss_data = np.array(losses).mean()
        print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data))

        if epoch % 5 == 0:
            scores = []
            for batch, test_data in enumerate(test_dataset):
                subgraph, features, labels = test_data
                features = features.clone().detach().to(device)
                labels = labels.clone().detach().to(device)
                score, _ = evaluate(features.float(), model,
                                    subgraph, labels.float(), loss_fcn)
                scores.append(score)
            print("F1-Score: {:.4f} ".format(np.array(scores).mean()))


def test(model, loss_fcn, device, test_dataloader):
    test_scores = []
    for batch, test_data in enumerate(test_dataloader):
        subgraph, features, labels = test_data
        features = features.to(device)
        labels = labels.to(device)
        test_scores.append(
            evaluate(features, model, subgraph, labels.float(), loss_fcn)[0])
    mean_scores = np.array(test_scores).mean()
    print("F1-Score: {:.4f}".format(np.array(test_scores).mean()))
    return mean_scores


def evaluate(features, model, subgraph, labels, loss_fcn):
    with torch.no_grad():
        model.eval()
        model.g = subgraph
        for layer in model.layers:
            layer.g = subgraph
        output = model(features.float())
        loss_data = loss_fcn(output, labels.float())
        predict = np.where(output.data.cpu().numpy() >= 0.5, 1, 0)
        score = f1_score(labels.data.cpu().numpy(), predict, average="micro")
        return score, loss_data.item()


def collate_fn(sample):
    graphs, features, labels = map(list, zip(*sample))
    graph = batch(graphs)
    features = torch.from_numpy(np.concatenate(features))
    labels = torch.from_numpy(np.concatenate(labels))
    return graph, features, labels


### Execution

In [8]:
if not JUPYTER:
    # We are in a notebook and not in a python file    
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode",  choices=["train", "test"], default="train")
    parser.add_argument("--gpu", type=int, default=-1,
                        help="GPU to use. Set -1 to use CPU.")
    parser.add_argument("--epochs", type=int, default=250)
    parser.add_argument("--batch-size", type=int, default=2)

    args = parser.parse_args()
    main(args)
else:
    class Args:
        mode = "train"
        # "cpu" if args.gpu < 0 else "cuda:" + str(args.gpu)
        gpu = -1
        epochs = 20
        batch_size = 2

    args=Args()
    main(args)



Epoch 00001 | Loss: 0.6870
F1-Score: 0.0010 
Epoch 00002 | Loss: 0.6523
Epoch 00003 | Loss: 0.6056
Epoch 00004 | Loss: 0.5915
Epoch 00005 | Loss: 0.5808
Epoch 00006 | Loss: 0.5738
F1-Score: 0.3369 
Epoch 00007 | Loss: 0.5681
Epoch 00008 | Loss: 0.5642
Epoch 00009 | Loss: 0.5611
Epoch 00010 | Loss: 0.5588
Epoch 00011 | Loss: 0.5570
F1-Score: 0.3670 
Epoch 00012 | Loss: 0.5554
Epoch 00013 | Loss: 0.5540
Epoch 00014 | Loss: 0.5527
Epoch 00015 | Loss: 0.5514
Epoch 00016 | Loss: 0.5502
F1-Score: 0.3725 
Epoch 00017 | Loss: 0.5490
Epoch 00018 | Loss: 0.5479
Epoch 00019 | Loss: 0.5468
Epoch 00020 | Loss: 0.5457
F1-Score: 0.3773


In [9]:
class Args:
        mode = "test"
        # "cpu" if args.gpu < 0 else "cuda:" + str(args.gpu)
        # For some reason, gpu = 0 does not work.
        gpu = -1
        epochs = 20
        batch_size = 2

args=Args()
main(args)



F1-Score: 0.3773


0.37725352297478154