In [None]:
!pip install torch===1.7.1 torchvision===0.8.2 torchaudio===0.7.2 -f https://download.pytorch.org/whl/torch_stable.html

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch===1.7.1
[?25l  Downloading https://files.pythonhosted.org/packages/90/4f/acf48b3a18a8f9223c6616647f0a011a5713a985336088d7c76f3a211374/torch-1.7.1-cp36-cp36m-manylinux1_x86_64.whl (776.8MB)
[K     |████████████████████████████████| 776.8MB 24kB/s 
[?25hCollecting torchvision===0.8.2
[?25l  Downloading https://files.pythonhosted.org/packages/19/f1/d1d9b2be9f50e840accfa180ec2fb759dd2504f2b3a12a232398d5fa00ae/torchvision-0.8.2-cp36-cp36m-manylinux1_x86_64.whl (12.8MB)
[K     |████████████████████████████████| 12.8MB 178kB/s 
[?25hCollecting torchaudio===0.7.2
[?25l  Downloading https://files.pythonhosted.org/packages/2a/f9/618434cf4e46dc975871e1516f5499abef6564ab4366f9b2321ee536be14/torchaudio-0.7.2-cp36-cp36m-manylinux1_x86_64.whl (7.6MB)
[K     |████████████████████████████████| 7.6MB 38.1MB/s 
Installing collected packages: torch, torchvision, torchaudio
  Found existing installation: torch 1.7

In [None]:
!pip install dgl-cu101 

Collecting dgl-cu101
[?25l  Downloading https://files.pythonhosted.org/packages/77/08/ea2d56e85eba1c22a14fa0f9b3c4ca8b43bf07de34e454d4e23632b376ea/dgl_cu101-0.5.3-cp36-cp36m-manylinux1_x86_64.whl (25.0MB)
[K     |████████████████████████████████| 25.0MB 139kB/s 
Installing collected packages: dgl-cu101
Successfully installed dgl-cu101-0.5.3


In [None]:
import argparse
from os import path

import numpy as np
import torch
import torch.nn.functional as F
from dgl import batch
from dgl.data.ppi import LegacyPPIDataset
from dgl.nn.pytorch import GraphConv
from dgl.nn.pytorch import GATConv
from sklearn.metrics import f1_score
from torch import nn, optim
from torch.utils.data import DataLoader


#MODEL_STATE_FILE = path.join(path.dirname(path.abspath(__file__)), "model_state.pth")
MODEL_STATE_FILE = "./model_state.pth"

class GAT(nn.Module):
    def __init__(self, g, input_size, hidden_size, output_size, nonlinearity, device_name):
        super().__init__()

        self.device_name=device_name
        self.g = g
        
        self.layers = nn.ModuleList()
        self.layers.append(GATConv(input_size, hidden_size, 8, activation=nonlinearity))
        self.layers.append(GATConv(hidden_size, hidden_size, 8, activation=nonlinearity))
        self.layers.append(GATConv(hidden_size, output_size, 6, activation=nonlinearity))



    def forward(self, inputs):
        
        self.g = self.g.to(self.device_name)
        outputs = inputs
        
        for i, layer in enumerate(self.layers):
            outputs = layer(self.g, outputs)
            outputs = torch.mean(outputs, 1)
        
        return outputs


def main(args):
    # create the dataset
    train_dataset, test_dataset = LegacyPPIDataset(mode="train"), LegacyPPIDataset(mode="test")
    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=collate_fn)
    test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, collate_fn=collate_fn)
    n_features, n_classes = train_dataset.features.shape[1], train_dataset.labels.shape[1]

    # create the model, loss function and optimizer
    device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu))
    model = GAT(g=train_dataset.graph, input_size=n_features,
                hidden_size=256, output_size=n_classes, 
                nonlinearity=F.elu, device_name=device).to(device)
    loss_fcn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    # train and test
    if args.mode == "train":
        train(model, loss_fcn, device, optimizer, train_dataloader, test_dataset)
        torch.save(model.state_dict(), MODEL_STATE_FILE)
    model.load_state_dict(torch.load(MODEL_STATE_FILE))
    return test(model, loss_fcn, device, test_dataloader)


def train(model, loss_fcn, device, optimizer, train_dataloader, test_dataset):
    max_score = 0.89
    for epoch in range(args.epochs):
        model.train()
        losses = []
        for batch, data in enumerate(train_dataloader):
            subgraph, features, labels = data
            features = features.to(device)
            labels = labels.to(device)
            model.g = subgraph.to(device)
            for layer in model.layers:
                layer.g = subgraph.to(device)
            logits = model(features.float())
            loss = loss_fcn(logits, labels.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        loss_data = np.array(losses).mean()
        print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data))

        if epoch % 5 == 0:
            scores = []
            for batch, test_data in enumerate(test_dataset):
                subgraph, features, labels = test_data
                features = features.clone().detach().to(device)
                labels = labels.clone().detach().to(device)
                score, _ = evaluate(features.float(), model, subgraph, labels.float(), loss_fcn)
                scores.append(score)
            final_score = np.array(scores).mean()
            if final_score > max_score:
                print("Highest score reached! Saving model")
                torch.save(model.state_dict(), f'./dropout_{final_score:.2f}model_state.pth')
                max_score = final_score
            print("F1-Score: {:.4f} ".format(np.array(scores).mean()))


def test(model, loss_fcn, device, test_dataloader):
    test_scores = []
    for batch, test_data in enumerate(test_dataloader):
        subgraph, features, labels = test_data
        features = features.to(device)
        labels = labels.to(device)
        test_scores.append(evaluate(features, model, subgraph, labels.float(), loss_fcn)[0])
    mean_scores = np.array(test_scores).mean()
    print("F1-Score: {:.4f}".format(np.array(test_scores).mean()))
    return mean_scores


def evaluate(features, model, subgraph, labels, loss_fcn):
    with torch.no_grad():
        model.eval()
        model.g = subgraph
        for layer in model.layers:
            layer.g = subgraph
        output = model(features.float())
        loss_data = loss_fcn(output, labels.float())
        predict = np.where(output.data.cpu().numpy() >= 0.5, 1, 0)
        score = f1_score(labels.data.cpu().numpy(), predict, average="micro")
        return score, loss_data.item()


def collate_fn(sample):
    graphs, features, labels = map(list, zip(*sample))
    graph = batch(graphs)
    features = torch.from_numpy(np.concatenate(features))
    labels = torch.from_numpy(np.concatenate(labels))
    return graph, features, labels


class Args:
    def __init__(self):
        self.mode = 'test'
        self.gpu = 0
        self.batch_size = 2
        self.epochs = 1000

args = Args()
main(args)




F1-Score: 0.8979


0.8979339581952641