In [1]:
from torch_geometric.data import Data
from torch_geometric.utils import train_test_split_edges
import torch
import networkx as nx
from Bio import SeqIO
import numpy as np
import os.path as osp
import pandas as pd

import torch
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score

from torch_geometric.utils import (negative_sampling, remove_self_loops,
                                   add_self_loops)
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, ChebConv, GATConv, ECConv  # noqa
from torch_geometric.utils import train_test_split_edges
from torch_geometric.nn import SAGEConv, SplineConv


In [2]:
G = nx.read_edgelist('../Data/Interactome/huri_apid_merge_ppis_edgelist.csv')
embeddings = np.load('../Data/embeddings/embeddings.npz')

In [None]:
nodelist = [x for x in G.nodes() if x in embeddings.keys()]
G = nx.Graph(G.subgraph(nodelist))
G.remove_edges_from(nx.selfloop_edges(G))
nodemapping = {x:int(i) for i,x in enumerate(nodelist)}
embeddings_matrix = np.array([embeddings[x] for x in nodelist])

In [None]:
embeddings_matrix2 = np.ones((len(nodelist),1))

In [None]:
H = nx.relabel_nodes(G, nodemapping)

In [None]:
index_edges = np.array(list(H.edges(nbunch=nodemapping.values()))).astype(int)

In [None]:
edge_index = torch.tensor(index_edges.T, dtype=torch.long)
x = torch.tensor(embeddings_matrix, dtype=torch.float)

In [None]:
data = Data(x=x, edge_index=edge_index)
data.train_mask = data.val_mask = data.test_mask = data.y = None
data = train_test_split_edges(data, val_ratio=0.00, test_ratio=0.76)

In [9]:
pos_test = data.test_pos_edge_index
neg_test = data.test_neg_edge_index
train_pos = data.train_pos_edge_index

pos_neg = []
train_edges = []
for x in range(2):
    train_edges.append([nodelist[i] for i in train_pos[x]] + [nodelist[i] for i in train_pos[x]])
    pos_neg.append([nodelist[i] for i in pos_test[x]] + [nodelist[i] for i in neg_test[x]])

pos_neg = np.array(pos_neg).T
train_edges = list(set(np.array(train_edges).flatten()))

In [10]:
df_mmseq = pd.read_csv('../Data/mmseqs/align.m8', sep='\t', header=None)
more_40_seq_id = df_mmseq[df_mmseq[2] > 0.4]
node_sim_dict = {}
for x in more_40_seq_id[[0,1]].values:
    if x[0] not in node_sim_dict.keys():
        node_sim_dict[x[0]] = [x[0]]
    if x[1] not in node_sim_dict.keys():
        node_sim_dict[x[1]] = [x[1]]

    node_sim_dict[x[0]].append(x[1])
    node_sim_dict[x[1]].append(x[0])

node_sim_dict = {x:list(set(y)) for x,y in node_sim_dict.items()}

sim_train_nodes = []
for x in train_edges:
    if x in node_sim_dict.keys():
        sim_train_nodes = sim_train_nodes + node_sim_dict[x]
    else:
        sim_train_nodes = sim_train_nodes + [x]

sim_train_nodes = list(set(sim_train_nodes))

In [11]:
idx_c1 = []
idx_c2 = []
idx_c3 = []

for i, x in enumerate(pos_neg):
    if x[0] in sim_train_nodes and x[1] in sim_train_nodes:
        idx_c1.append(i)
    elif x[0] not in sim_train_nodes and x[1] not in sim_train_nodes:
        idx_c3.append(i)   
    else:
        idx_c2.append(i) 

In [12]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(1024, 1024)
        self.lin1 = torch.nn.Linear(1024, 1024)
        self.conv2 = GCNConv(1024, 1024)
        self.lin2 = torch.nn.Linear(1024, 1024)
#         self.conv3 = GCNConv(1024, 1024)
#         self.lin3 = torch.nn.Linear(1024, 1024)

        self.lin4 = torch.nn.Linear(2048, 1024)
        self.lin5 = torch.nn.Linear(1024, 512)
#         self.lin6 = torch.nn.Linear(128, 1)
#         self.lin2 = torch.nn.Linear(256, 128)

    def forward(self, pos_edge_index, neg_edge_index):

        x1 = F.elu(self.conv1(data.x, data.train_pos_edge_index)+ self.lin1(data.x))
        x1 = F.dropout(x1, p=0.5, training=self.training)
        x2 = F.elu(self.conv2(x1, data.train_pos_edge_index)+ self.lin2(data.x))
        x2 = F.dropout(x2, p=0.5, training=self.training)

#         x = F.elu(self.conv2(x, data.train_pos_edge_index)+ self.lin2(x)) 
#         x = F.elu(self.conv3(x, data.train_pos_edge_index)+ self.lin3(x))
        x = torch.cat([x1, x2], dim=-1)

        x = F.elu(self.lin4(x))
        x = F.dropout(x, p=0.5, training=self.training)

        x = F.elu(self.lin5(x))
#         x = F.elu(self.lin6(x))
        
        total_edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1)
        x_j = torch.index_select(x, 0, total_edge_index[0])
        x_i = torch.index_select(x, 0, total_edge_index[1])
        return torch.einsum("ef,ef->e", x_i, x_j)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.0001)


def get_link_labels(pos_edge_index, neg_edge_index):
    link_labels = torch.zeros(pos_edge_index.size(1) +
                              neg_edge_index.size(1)).float().to(device)
    link_labels[:pos_edge_index.size(1)] = 1.
    return link_labels


def train():
    model.train()
    optimizer.zero_grad()

    x, pos_edge_index = data.x, data.train_pos_edge_index

    _edge_index, _ = remove_self_loops(pos_edge_index)
    pos_edge_index_with_self_loops, _ = add_self_loops(_edge_index,
                                                       num_nodes=x.size(0))

    neg_edge_index = negative_sampling(
        edge_index=pos_edge_index_with_self_loops, num_nodes=x.size(0),
        num_neg_samples=pos_edge_index.size(1))

    link_logits = model(pos_edge_index, neg_edge_index)
    link_labels = get_link_labels(pos_edge_index, neg_edge_index)

    loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
    loss.backward()
    optimizer.step()

    return loss


def test():
    model.eval()
    perfs = []
    for prefix in ["test"]:
        pos_edge_index, neg_edge_index = [
            index for _, index in data("{}_pos_edge_index".format(prefix),
                                       "{}_neg_edge_index".format(prefix))
        ]
        link_probs = torch.sigmoid(model(pos_edge_index, neg_edge_index))
        link_labels = get_link_labels(pos_edge_index, neg_edge_index)
        link_probs = link_probs.detach().cpu().numpy()
        link_labels = link_labels.detach().cpu().numpy()
        perfs.append(roc_auc_score(link_labels[idx_c1], link_probs[idx_c1]))
        perfs.append(roc_auc_score(link_labels[idx_c2], link_probs[idx_c2]))
        perfs.append(roc_auc_score(link_labels[idx_c3], link_probs[idx_c3]))
        print(sum(link_labels[idx_c1]),sum(link_labels[idx_c2]),sum(link_labels[idx_c3]))

    return perfs


best_val_perf = test_perf = 0
for epoch in range(1, 501):
    train_loss = train()
    test_c1, test_c2, test_c3 = test()
 
    log = 'Epoch: {:03d}, Loss: {:.4f}, Test C1: {:.4f}, Test C2: {:.4f}, Test C3: {:.4f}'
    print(log.format(epoch, train_loss, test_c1, test_c2, test_c3))



78083.0 5275.0 112.0
Epoch: 001, Loss: 0.6783, Test C1: 0.8524, Test C2: 0.7749, Test C3: 0.6151


RuntimeError: CUDA out of memory. Tried to allocate 206.00 MiB (GPU 0; 3.95 GiB total capacity; 1.48 GiB already allocated; 216.69 MiB free; 1.80 GiB reserved in total by PyTorch) (malloc at /opt/conda/conda-bld/pytorch_1587428094786/work/c10/cuda/CUDACachingAllocator.cpp:289)
frame #0: c10::Error::Error(c10::SourceLocation, std::string const&) + 0x4e (0x7fe0b052bb5e in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x1f39d (0x7fe0b02ed39d in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libc10_cuda.so)
frame #2: <unknown function> + 0x2058e (0x7fe0b02ee58e in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libc10_cuda.so)
frame #3: THCStorage_resize + 0x96 (0x7fe06a1a7686 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #4: at::native::(anonymous namespace)::resize_cuda_(at::Tensor&, c10::ArrayRef<long>, c10::optional<c10::MemoryFormat>) + 0x799 (0x7fe06bcdd879 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #5: <unknown function> + 0x2a0e253 (0x7fe06bcde253 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #6: <unknown function> + 0xd84292 (0x7fe06a054292 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #7: <unknown function> + 0xeb735f (0x7fe06a18735f in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #8: at::native::bmm_cuda(at::Tensor const&, at::Tensor const&) + 0x9 (0x7fe06b8d79b9 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #9: <unknown function> + 0xdd7ac8 (0x7fe06a0a7ac8 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cuda.so)
frame #10: <unknown function> + 0xe224d0 (0x7fe0910a74d0 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #11: <unknown function> + 0x288634c (0x7fe092b0b34c in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #12: <unknown function> + 0xe224d0 (0x7fe0910a74d0 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #13: at::Tensor c10::Dispatcher::callUnboxed<at::Tensor, at::Tensor const&, at::Tensor const&>(c10::OperatorHandle const&, at::Tensor const&, at::Tensor const&) const + 0xb3 (0x7fe0b0b9ada3 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_python.so)
frame #14: at::Tensor::bmm(at::Tensor const&) const + 0x4c (0x7fe0928f258c in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #15: torch::autograd::generated::BmmBackward::apply(std::vector<at::Tensor, std::allocator<at::Tensor> >&&) + 0x1b7 (0x7fe09288c847 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #16: <unknown function> + 0x2ae8215 (0x7fe092d6d215 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #17: torch::autograd::Engine::evaluate_function(std::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&) + 0x16f3 (0x7fe092d6a513 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #18: torch::autograd::Engine::thread_main(std::shared_ptr<torch::autograd::GraphTask> const&, bool) + 0x3d2 (0x7fe092d6b2f2 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #19: torch::autograd::Engine::thread_init(int) + 0x39 (0x7fe092d63969 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_cpu.so)
frame #20: torch::autograd::python::PythonEngine::thread_init(int) + 0x38 (0x7fe0b0e5e9f8 in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/torch/lib/libtorch_python.so)
frame #21: <unknown function> + 0xc819d (0x7fe0cca0b19d in /home/jan-van-eck/anaconda3/envs/GNN/lib/python3.8/site-packages/zmq/backend/cython/../../../../.././libstdc++.so.6)
frame #22: <unknown function> + 0x9609 (0x7fe0cfb0d609 in /lib/x86_64-linux-gnu/libpthread.so.0)
frame #23: clone + 0x43 (0x7fe0cfa34103 in /lib/x86_64-linux-gnu/libc.so.6)
