In [1]:
from torch_geometric.data import HeteroData, DataLoader
import torch_geometric.transforms as T
from torch_geometric.nn import HeteroConv , GCNConv , SAGEConv , GATConv
from torch_geometric.utils import negative_sampling
from torch_geometric.loader import LinkNeighborLoader

import torch
from torch import nn 
import torch.nn.functional as F
import torch.optim as optim

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder , label_binarize , OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

import os 
import pandas as pd
import numpy as np
from tqdm import tqdm
from itertools import product
import random
from collections import Counter
import warnings
warnings.filterwarnings("ignore") 


In [2]:
path_work = "/media/concha-eloko/Linux/PPT_clean"
graph_data = torch.load(f'{path_work}/graph_file.1107.pt')

graph_data

HeteroData(
  [1mA[0m={ x=[4530, 127] },
  [1mB1[0m={ x=[11339, 0] },
  [1mB2[0m={ x=[3608, 1280] },
  [1m(B1, infects, A)[0m={
    edge_index=[2, 9677],
    y=[9677]
  },
  [1m(B2, expressed, B1)[0m={
    edge_index=[2, 13285],
    y=[13285]
  },
  [1m(A, harbors, B1)[0m={
    edge_index=[2, 9677],
    y=[9677]
  }
)

In [3]:
# *****************************************************************************
# Pre-process data :
transform = T.RandomLinkSplit(
    num_val=0.1, 
    num_test=0.2, 
    #disjoint_train_ratio=...,  
    neg_sampling_ratio=1.0,  
    add_negative_train_samples=True, 
    edge_types=("B1", "infects", "A"),
    rev_edge_types=("A", "harbors", "B1"), 
)

train_data, val_data, test_data = transform(graph_data)

train_loader = LinkNeighborLoader(
    data=train_data,  
    num_neighbors= [-1],  
    edge_label_index=(("B1", "infects", "A"), train_data["B1", "infects", "A"].edge_label_index),
    edge_label=train_data["B1", "infects", "A"].edge_label,
    batch_size=128,
    shuffle=True,
)

val_loader = LinkNeighborLoader(
    data=val_data,  
    num_neighbors= [-1],  
    edge_label_index=(("B1", "infects", "A"), val_data["B1", "infects", "A"].edge_label_index),
    edge_label=val_data["B1", "infects", "A"].edge_label,
    batch_size=128,
    shuffle=True,
)

test_loader = LinkNeighborLoader(
    data=test_data,  
    num_neighbors= [-1],  
    edge_label_index=(("B1", "infects", "A"), test_data["B1", "infects", "A"].edge_label_index),
    edge_label=test_data["B1", "infects", "A"].edge_label,
    batch_size=128,
    shuffle=True,
)

In [4]:
graph_data

HeteroData(
  [1mA[0m={ x=[4530, 127] },
  [1mB1[0m={ x=[11339, 0] },
  [1mB2[0m={ x=[3608, 1280] },
  [1m(B1, infects, A)[0m={
    edge_index=[2, 9677],
    y=[9677]
  },
  [1m(B2, expressed, B1)[0m={
    edge_index=[2, 13285],
    y=[13285]
  },
  [1m(A, harbors, B1)[0m={
    edge_index=[2, 9677],
    y=[9677]
  }
)

In [6]:
train_data

HeteroData(
  [1mA[0m={ x=[4530, 127] },
  [1mB1[0m={ x=[11339, 0] },
  [1mB2[0m={ x=[3608, 1280] },
  [1m(B1, infects, A)[0m={
    edge_index=[2, 6775],
    y=[6775],
    edge_label=[13550],
    edge_label_index=[2, 13550]
  },
  [1m(B2, expressed, B1)[0m={
    edge_index=[2, 13285],
    y=[13285]
  },
  [1m(A, harbors, B1)[0m={
    edge_index=[2, 6775],
    y=[6775]
  }
)

In [5]:
train_data[("B1", "infects", "A")]["edge_index"]

{'edge_index': tensor([[5412,  445, 6170,  ..., 6648, 6831, 2580],
        [1950,  344, 1298,  ...,  283, 2744,  498]]), 'y': tensor([1., 1., 1.,  ..., 1., 1., 1.]), 'edge_label': tensor([1., 1., 1.,  ..., 0., 0., 0.]), 'edge_label_index': tensor([[5412,  445, 6170,  ..., 6631, 9589, 7495],
        [1950,  344, 1298,  ...,  817, 4204, 3473]])}

In [9]:
sampled_data = next(iter(train_loader))

sampled_data

HeteroData(
  [1mA[0m={
    x=[159, 127],
    n_id=[159]
  },
  [1mB1[0m={
    x=[323, 0],
    n_id=[323]
  },
  [1mB2[0m={
    x=[140, 1280],
    n_id=[140]
  },
  [1m(B1, infects, A)[0m={
    edge_index=[2, 323],
    y=[323],
    edge_label=[128],
    edge_label_index=[2, 128],
    e_id=[323],
    input_id=[128]
  },
  [1m(B2, expressed, B1)[0m={
    edge_index=[2, 177],
    y=[177],
    e_id=[177]
  },
  [1m(A, harbors, B1)[0m={
    edge_index=[2, 133],
    y=[133],
    e_id=[133]
  }
)

> That one seems to work :

In [None]:
class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = torch.nn.Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, 1)

    def forward(self, features_A, features_B1, graph_data):
        index_B1 , index_A = graph_data["B1", "infects", "A"].edge_label_index
        z = torch.cat([features_B1[index_B1] ,features_A[index_A]], dim=-1)  # Can you explain why this line gives me an error 
        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)

In [10]:
# *****************************************************************************
# The model : GAT dot product
class GNN(torch.nn.Module):
    def __init__(self, edge_type , hidden_channels, conv=GATConv): # GCNConv(-1, 64) , SAGEConv((-1, -1), 64), GATConv((-1, -1), 64)
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False)
        self.hetero_conv = HeteroConv({edge_type: self.conv})
    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)
        return x

# FNN layers product :
class Classifier_linear(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = torch.nn.Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, 1)

    def forward(self, x_dict_A, x_dict_B1, graph):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][graph[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][graph[edge_type].edge_label_index[0]]
        z = torch.cat([edge_feat_A ,edge_feat_B1], dim=-1)  # Can you explain why this line gives me an error 
        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)
        
class Model(torch.nn.Module):
    def __init__(self, out_channels , conv=GATConv):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1") , out_channels)
        self.second_layer_model = GNN(("B1", "infects", "A") , out_channels)
        self.classifier_linear = Classifier_linear(out_channels)

    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict , graph_data.edge_index_dict)
        updated_dict = {}
        updated_dict["A"], updated_dict["B2"] = graph_data.x_dict["A"], graph_data.x_dict["B2"]
        updated_dict["B1"] = b1_nodes["B1"]
        a_nodes = self.second_layer_model(updated_dict , graph_data.edge_index_dict)
        value = self.classifier_linear(a_nodes ,b1_nodes, graph_data)
        return value



In [12]:
class GNN(torch.nn.Module):
    def __init__(self, edge_type , hidden_channels, conv=GATConv): # GCNConv(-1, 64) , SAGEConv((-1, -1), 64), GATConv((-1, -1), 64)
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False, heads = 3, dropout = 0.1)
        self.hetero_conv = HeteroConv({edge_type: self.conv})
    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)
        return x

# Dot product :
class Classifier(torch.nn.Module):
    def forward(self, x_dict_A , x_dict_B1, edge_index):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][edge_index[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][edge_index[edge_type].edge_label_index[0]]
        return (edge_feat_A * edge_feat_B1).sum(dim=-1)

class Model(torch.nn.Module):
    def __init__(self, out_channels , conv=SAGEConv):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1") , out_channels)
        self.second_layer_model = GNN(("B1", "infects", "A") , out_channels)
        self.classifier_dot = Classifier()
    # Intergrate a Leaky Relu activation function between the two layers :
    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict , graph_data.edge_index_dict)
        b1_nodes_activated = {}
        for key in b1_nodes: # Apply the ReLU activation function on each tensor
            b1_nodes_activated[key] = F.leaky_relu(b1_nodes[key])
        updated_dict = {}
        updated_dict["A"], updated_dict["B2"] = graph_data.x_dict["A"], graph_data.x_dict["B2"]
        updated_dict["B1"] = b1_nodes_activated["B1"]
        a_nodes = self.second_layer_model(updated_dict , graph_data.edge_index_dict)
        dot_product = self.classifier_dot(a_nodes ,b1_nodes , graph_data)
        return dot_product


In [13]:
model = Model(20)
val = model(sampled_data)

In [14]:
val

tensor([ 0.0000,  0.0000, -0.0639,  0.0000, -0.3160,  0.0000,  0.0000,  0.1011,
         0.0679, -0.2207,  0.0000, -0.0078,  0.0000,  0.0000,  0.0000,  0.0000,
        -0.2258,  0.0000,  0.0000,  0.0000, -0.1813,  0.0000, -0.0290,  0.0000,
        -0.0346,  0.0000,  0.0000,  0.0000, -0.0377, -0.0572, -0.0387, -0.0560,
         0.0000, -0.1203, -0.1259, -0.1957,  0.0000, -0.1646, -0.0155, -0.3638,
         0.0000, -0.1437, -0.1668,  0.0000,  0.0000, -0.2432, -0.0436,  0.0000,
        -0.3279, -0.1003,  0.0000,  0.0876, -0.1352,  0.0000,  0.0000, -0.0655,
         0.0000, -0.0653, -0.0644,  0.0045,  0.0000,  0.0000,  0.0000, -0.1728,
        -0.1282, -0.0864,  0.0000,  0.0000, -0.0290, -0.0984, -0.3291, -0.3795,
        -0.0479, -0.0167, -0.0570, -0.1127,  0.0208,  0.0000, -0.3252, -0.1368,
         0.0000, -0.0138, -0.0428, -0.0443,  0.0000, -0.4220, -0.0928,  0.0000,
        -0.2625, -0.0070,  0.0178, -0.1261, -0.0166,  0.0000,  0.0000,  0.0000,
         0.0000, -0.0653, -0.0191,  0.00