In [None]:
rsync -avzhe ssh \
conchae@garnatxa.srv.cpd:/home/conchae/prediction_depolymerase_tropism/prophage_prediction/depolymerase_decipher/ficheros_28032023/ensemble_1908 \
/media/concha-eloko/Linux/PPT_clean/ficheros_28032023


In [3]:
from torch_geometric.data import HeteroData, DataLoader
import torch_geometric.transforms as T
from torch_geometric.nn import to_hetero , HeteroConv , GATConv
from torch_geometric.utils import negative_sampling
from torch_geometric.loader import LinkNeighborLoader

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder , label_binarize , OneHotEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score , matthews_corrcoef

import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from itertools import product
import random
from collections import Counter
import warnings
import logging
from multiprocessing.pool import ThreadPool
warnings.filterwarnings("ignore")

# *****************************************************************************
# Load the Dataframes :
#path_work = "/home/conchae/prediction_depolymerase_tropism/prophage_prediction/depolymerase_decipher/ficheros_28032023"
path_work = "/media/concha-eloko/Linux/PPT_clean"
graph_data = torch.load(f'{path_work}/Tropi_graph.lvl_0.1909.pt')


In [4]:
graph_data

HeteroData(
  [1mA[0m={ x=[4499, 127] },
  [1mB1[0m={ x=[7573, 0] },
  [1mB2[0m={ x=[3426, 1280] },
  [1m(B1, infects, A)[0m={
    edge_index=[2, 7573],
    y=[7573]
  },
  [1m(B2, expressed, B1)[0m={
    edge_index=[2, 9476],
    y=[9476]
  },
  [1m(A, harbors, B1)[0m={
    edge_index=[2, 7573],
    y=[7573]
  }
)

> Original :

In [None]:
# The model : Classifier
class GNN(torch.nn.Module):
    def __init__(self, edge_type , conv, hidden_channels, heads, dropout):
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False, heads = heads, dropout = dropout, shared_weights = True)
        self.hetero_conv = HeteroConv({edge_type: self.conv})
    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)  
        return x

# Classifier, Binary :
class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels, heads, n_kl_types):
        super().__init__()
        self.lin1 = torch.nn.Linear(heads*hidden_channels + n_kl_types, 512)
        self.lin2 = torch.nn.Linear(512, 1)
        
    def forward(self, x_dict_A , x_dict_B1, graph_data):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][graph_data[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][graph_data[edge_type].edge_label_index[0]]
        features_phage = torch.cat((edge_feat_A ,edge_feat_B1), dim=-1)
        x = self.lin1(features_phage).relu()
        x = self.lin2(x)
        return x.view(-1)

class Model(torch.nn.Module):
    def __init__(self, conv, hidden_channels, heads, dropout,n_kl_types):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1") ,conv, hidden_channels,heads,dropout) 
        self.EdgeDecoder = EdgeDecoder(hidden_channels,heads,n_kl_types)
        
    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict , graph_data.edge_index_dict)
        a_nodes =  graph_data.x_dict
        out = self.EdgeDecoder(a_nodes ,b1_nodes , graph_data)
        return out


> The alternative architecture

In [None]:
class GNN(torch.nn.Module):
    def __init__(self, edge_type , conv, hidden_channels, heads, dropout_GNN):
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False, heads = heads, dropout = dropout_GNN, shared_weights = True)
        self.hetero_conv = HeteroConv({edge_type: self.conv})

    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)  
        return x

# Classifier, Binary :
class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels, heads, n_kl_types, dropout):
        super().__init__()
        self.lin1 = torch.nn.Linear(heads * hidden_channels, 720)
        self.batch_norm1 = torch.nn.BatchNorm1d(720)
        self.dropout1 = torch.nn.Dropout(dropout)

        self.lin2 = torch.nn.Linear(720, 150)
        self.batch_norm2 = torch.nn.BatchNorm1d(150)
        self.dropout2 = torch.nn.Dropout(dropout)

        self.lin3 = torch.nn.Linear(150 + n_kl_types, 100)
        self.batch_norm3 = torch.nn.BatchNorm1d(100)
        self.dropout3 = torch.nn.Dropout(dropout)
        
        self.lin4 = torch.nn.Linear(100, 1)

    def forward(self, x_dict_A, x_dict_B1, graph_data):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][graph_data[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][graph_data[edge_type].edge_label_index[0]]
        
        x = self.lin1(edge_feat_B1)
        x = self.batch_norm1(x)
        x = F.leaky_relu(x)
        x = self.dropout1(x)

        x = self.lin2(x)
        x = self.batch_norm2(x)
        x = F.leaky_relu(x)
        x = self.dropout2(x)
        if torch.isnan(x).any():
            print("NaN detected! pos A")
        
        # Concatenating the reduced dimension feature with edge_feat_A
        features_phage = torch.cat((x, edge_feat_A), dim=-1)

        x = self.lin3(features_phage)
        x = self.batch_norm3(x)
        x = F.leaky_relu(x)
        x = self.dropout3(x)

        x = self.lin4(x)
        if torch.isnan(x).any():
            print("NaN detected! pos B")
        return x.view(-1)

class Model(torch.nn.Module):
    def __init__(self, conv, hidden_channels, heads, dropout_GNN, dropout, n_kl_types):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1"), conv, hidden_channels, heads, dropout_GNN)
        self.EdgeDecoder = EdgeDecoder(hidden_channels, heads, n_kl_types, dropout)

    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict, graph_data.edge_index_dict)
        a_nodes = graph_data.x_dict
        out = self.EdgeDecoder(a_nodes, b1_nodes, graph_data)
        if torch.isnan(out).any():
            print("NaN detected! pos C")
        return out

> arch 3 : 4 FNN layers in the EdgeDecoder

In [None]:
# The model : Classifier
class GNN(torch.nn.Module):
    def __init__(self, edge_type , conv, hidden_channels, heads, dropout):
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False, heads = heads, dropout = dropout, shared_weights = True)
        self.hetero_conv = HeteroConv({edge_type: self.conv})
    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)  
        return x

class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels, heads, n_kl_types):
        super().__init__()
        self.lin1 = torch.nn.Linear(heads * hidden_channels + n_kl_types, 1000)
        self.batch_norm1 = torch.nn.BatchNorm1d(1000)
        self.dropout1 = torch.nn.Dropout(0.2)
        
        self.lin2 = torch.nn.Linear(1000, 720)
        self.batch_norm2 = torch.nn.BatchNorm1d(720)
        self.dropout2 = torch.nn.Dropout(0.2)
        
        self.lin3 = torch.nn.Linear(720, 512)
        self.batch_norm3 = torch.nn.BatchNorm1d(512)
        self.dropout3 = torch.nn.Dropout(0.2)

        self.lin4 = torch.nn.Linear(512, 124)
        self.batch_norm4 = torch.nn.BatchNorm1d(124)
        self.dropout4 = torch.nn.Dropout(0.2)
        
        self.lin5 = torch.nn.Linear(124, 1)
        
    def forward(self, x_dict_A, x_dict_B1, graph_data):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][graph_data[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][graph_data[edge_type].edge_label_index[0]]
        features_phage = torch.cat((edge_feat_A, edge_feat_B1), dim=-1)
        
        x = self.lin1(features_phage)
        x = self.batch_norm1(x)
        x = self.dropout1(x.relu())
        
        x = self.lin2(x)
        x = self.batch_norm2(x)
        x = self.dropout2(x.relu())
        
        x = self.lin3(x)
        x = self.batch_norm3(x)
        x = self.dropout3(x.relu())

        x = self.lin4(x)
        x = self.batch_norm4(x)
        x = self.dropout4(x.relu())
        
        x = self.lin5(x)
        return x.view(-1)

class Model(torch.nn.Module):
    def __init__(self, conv, hidden_channels, heads, dropout,n_kl_types):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1") ,conv, hidden_channels,heads,dropout) 
        self.EdgeDecoder = EdgeDecoder(hidden_channels,heads,n_kl_types)
        
    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict , graph_data.edge_index_dict)
        a_nodes =  graph_data.x_dict
        out = self.EdgeDecoder(a_nodes ,b1_nodes , graph_data)
        return out

> architecture 4 : 5 FNN

In [None]:
# The model : Classifier
class GNN(torch.nn.Module):
    def __init__(self, edge_type , conv, hidden_channels, heads, dropout):
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False, heads = heads, dropout = dropout, shared_weights = True)
        self.hetero_conv = HeteroConv({edge_type: self.conv})
    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)  
        return x

class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels, heads, n_kl_types):
        super().__init__()
        self.lin1 = torch.nn.Linear(heads * hidden_channels + n_kl_types, 1000)
        self.batch_norm1 = torch.nn.BatchNorm1d(1000)
        self.dropout1 = torch.nn.Dropout(0.2)
        
        self.lin2 = torch.nn.Linear(1000, 720)
        self.batch_norm2 = torch.nn.BatchNorm1d(720)
        self.dropout2 = torch.nn.Dropout(0.2)
        
        self.lin3 = torch.nn.Linear(720, 512)
        self.batch_norm3 = torch.nn.BatchNorm1d(512)
        self.dropout3 = torch.nn.Dropout(0.2)

        self.lin4 = torch.nn.Linear(512, 124)
        self.batch_norm4 = torch.nn.BatchNorm1d(124)
        self.dropout4 = torch.nn.Dropout(0.2)
        
        self.lin5 = torch.nn.Linear(124, 1)
        
    def forward(self, x_dict_A, x_dict_B1, graph_data):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][graph_data[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][graph_data[edge_type].edge_label_index[0]]
        features_phage = torch.cat((edge_feat_A, edge_feat_B1), dim=-1)
        
        x = self.lin1(features_phage)
        x = F.leaky_relu(x)
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        
        x = self.lin2(x)
        x = F.leaky_relu(x)
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        
        x = self.lin3(x)
        x = F.leaky_relu(x)
        x = self.batch_norm3(x)
        x = self.dropout3(x)

        x = self.lin4(x)
        x = F.leaky_relu(x)
        x = self.batch_norm4(x)
        x = self.dropout4(x)
        
        x = self.lin5(x)
        return x.view(-1)

class Model(torch.nn.Module):
    def __init__(self, conv, hidden_channels, heads, dropout,n_kl_types):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1") ,conv, hidden_channels,heads,dropout) 
        self.EdgeDecoder = EdgeDecoder(hidden_channels,heads,n_kl_types)
        
    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict , graph_data.edge_index_dict)
        a_nodes =  graph_data.x_dict
        out = self.EdgeDecoder(a_nodes ,b1_nodes , graph_data)
        return out

> Architecture 

In [None]:
class GNN(torch.nn.Module):
    def __init__(self, edge_type , conv, hidden_channels, heads, dropout_GNN):
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False, heads = heads, dropout = dropout_GNN, shared_weights = True)
        self.hetero_conv = HeteroConv({edge_type: self.conv})
    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)  
        return x

# Classifier, Binary :
class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels, heads, n_kl_types, dropout = 0.4):
        super().__init__()
        self.lin1 = torch.nn.Linear(heads * hidden_channels, 1280)
        self.batch_norm1 = torch.nn.BatchNorm1d(1280)
        self.dropout1 = torch.nn.Dropout(dropout)

        self.lin2 = torch.nn.Linear(1280, 720)
        self.batch_norm2 = torch.nn.BatchNorm1d(720)
        self.dropout2 = torch.nn.Dropout(dropout)

        self.lin3 = torch.nn.Linear(720, 250)
        self.batch_norm3 = torch.nn.BatchNorm1d(250)
        self.dropout3 = torch.nn.Dropout(dropout)        

        self.lin4 = torch.nn.Linear(250 + n_kl_types, 72)
        self.batch_norm4 = torch.nn.BatchNorm1d(72)
        self.dropout4 = torch.nn.Dropout(dropout)
        
        self.lin5 = torch.nn.Linear(72, 1)

    def forward(self, x_dict_A, x_dict_B1, graph_data):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][graph_data[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][graph_data[edge_type].edge_label_index[0]]
        
        x = self.lin1(edge_feat_B1)
        x = self.batch_norm1(x)
        x = F.leaky_relu(x)
        x = self.dropout1(x)

        x = self.lin2(x)
        x = self.batch_norm2(x)
        x = F.leaky_relu(x)
        x = self.dropout2(x)

        x = self.lin3(x)
        x = self.batch_norm3(x)
        x = F.leaky_relu(x)
        x = self.dropout3(x)

        # Concatenating the reduced dimension feature with edge_feat_A
        features_phage = torch.cat((x, edge_feat_A), dim=-1)

        x = self.lin4(features_phage)
        x = self.batch_norm4(x)
        x = F.leaky_relu(x)
        x = self.dropout4(x)

        x = self.lin5(x)
        return x.view(-1)

class Model(torch.nn.Module):
    def __init__(self, conv, hidden_channels, heads, dropout_GNN, dropout, n_kl_types):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1"), conv, hidden_channels, heads, dropout_GNN)
        self.EdgeDecoder = EdgeDecoder(hidden_channels, heads, n_kl_types, dropout)

    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict, graph_data.edge_index_dict)
        a_nodes = graph_data.x_dict
        out = self.EdgeDecoder(a_nodes, b1_nodes, graph_data)
        
        return out




> 3 FNN layers :

In [None]:
# *****************************************************************************
#logging.basicConfig(filename = f"{path_work}/train_nn/GATv2Conv.1608.log",format='%(asctime)s | %(levelname)s: %(message)s', level=logging.NOTSET, filemode='w')

# The model : Classifier
class GNN(torch.nn.Module):
    def __init__(self, edge_type , conv, hidden_channels, heads, dropout):
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False, heads = heads, dropout = dropout, shared_weights = True)
        self.hetero_conv = HeteroConv({edge_type: self.conv})
    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)  
        return x

# Classifier, Binary :
class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels, heads, n_kl_types):
        super().__init__()
        self.lin1 = torch.nn.Linear(heads*hidden_channels + n_kl_types, 720)
        self.lin2 = torch.nn.Linear(720, 512)
        self.lin3 = torch.nn.Linear(512, 1)
        
    def forward(self, x_dict_A , x_dict_B1, graph_data):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][graph_data[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][graph_data[edge_type].edge_label_index[0]]
        features_phage = torch.cat((edge_feat_A ,edge_feat_B1), dim=-1)
        x = self.lin1(features_phage).F.leaky_relu()
        x = self.lin2(x).F.leaky_relu()
        x = self.lin3(x)
        return x.view(-1)

class Model(torch.nn.Module):
    def __init__(self, conv, hidden_channels, heads, dropout,n_kl_types):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1") ,conv, hidden_channels,heads,dropout) 
        self.EdgeDecoder = EdgeDecoder(hidden_channels,heads,n_kl_types)
        
    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict , graph_data.edge_index_dict)
        a_nodes =  graph_data.x_dict
        out = self.EdgeDecoder(a_nodes ,b1_nodes , graph_data)
        return out



> 3 FNN layers with dropout and batch normalization

In [None]:
class GNN(torch.nn.Module):
    def __init__(self, edge_type , conv, hidden_channels, heads, dropout_GNN):
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False, heads = heads, dropout = dropout_GNN, shared_weights = True)
        self.hetero_conv = HeteroConv({edge_type: self.conv})

    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)  
        return x

# Classifier, Binary :
class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels, heads, n_kl_types, dropout):
        super().__init__()
        self.lin1 = torch.nn.Linear(heads*hidden_channels + n_kl_types, 720)
        self.batch_norm1 = torch.nn.BatchNorm1d(720)
        self.dropout1 = torch.nn.Dropout(dropout)

        self.lin2 = torch.nn.Linear(720, 512)
        self.batch_norm2 = torch.nn.BatchNorm1d(512)
        self.dropout2 = torch.nn.Dropout(dropout)

        self.lin3 = torch.nn.Linear(512, 1)

    def forward(self, x_dict_A , x_dict_B1, graph_data):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][graph_data[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][graph_data[edge_type].edge_label_index[0]]
        
        features_phage = torch.cat((edge_feat_A, edge_feat_B1), dim=-1)

        x = self.lin1(features_phage)
        x = self.batch_norm1(x)
        x = F.leaky_relu(x)
        x = self.dropout1(x)

        x = self.lin2(x)
        x = self.batch_norm2(x)
        x = F.leaky_relu(x)
        x = self.dropout2(x)

        x = self.lin3(x)
        return x.view(-1)

class Model(torch.nn.Module):
    def __init__(self, conv, hidden_channels, heads, dropout_GNN, dropout, n_kl_types):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1"), conv, hidden_channels, heads, dropout_GNN)
        self.EdgeDecoder = EdgeDecoder(hidden_channels, heads, n_kl_types, dropout)

    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict, graph_data.edge_index_dict)
        a_nodes = graph_data.x_dict
        out = self.EdgeDecoder(a_nodes, b1_nodes, graph_data)
        return out

> 2 Layers 

In [None]:
class GNN(torch.nn.Module):
    def __init__(self, edge_type , conv, hidden_channels, heads, dropout_GNN):
        super().__init__()
        self.conv = conv((-1,-1), hidden_channels, add_self_loops = False, heads = heads, dropout = dropout_GNN, shared_weights = True)
        self.hetero_conv = HeteroConv({edge_type: self.conv})

    def forward(self, x_dict, edge_index_dict):
        x = self.hetero_conv(x_dict, edge_index_dict)  
        return x

# Classifier, Binary :
class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels, heads, n_kl_types, dropout):
        super().__init__()
        self.lin1 = torch.nn.Linear(heads*hidden_channels + n_kl_types, 512)
        self.batch_norm1 = torch.nn.BatchNorm1d(512)
        self.dropout1 = torch.nn.Dropout(dropout)

        self.lin2 = torch.nn.Linear(512, 1)


    def forward(self, x_dict_A , x_dict_B1, graph_data):
        edge_type = ("B1", "infects", "A")
        edge_feat_A = x_dict_A["A"][graph_data[edge_type].edge_label_index[1]]
        edge_feat_B1 = x_dict_B1["B1"][graph_data[edge_type].edge_label_index[0]]
        
        features_phage = torch.cat((edge_feat_A, edge_feat_B1), dim=-1)

        x = self.lin1(features_phage)
        x = self.batch_norm1(x)
        x = F.leaky_relu(x)
        x = self.dropout1(x)

        x = self.lin2(x)
        return x.view(-1)

class Model(torch.nn.Module):
    def __init__(self, conv, hidden_channels, heads, dropout_GNN, dropout, n_kl_types):
        super().__init__()
        self.single_layer_model = GNN(("B2", "expressed", "B1"), conv, hidden_channels, heads, dropout_GNN)
        self.EdgeDecoder = EdgeDecoder(hidden_channels, heads, n_kl_types, dropout)

    def forward(self, graph_data):
        b1_nodes = self.single_layer_model(graph_data.x_dict, graph_data.edge_index_dict)
        a_nodes = graph_data.x_dict
        out = self.EdgeDecoder(a_nodes, b1_nodes, graph_data)
        return out

In [None]:
class GIN(torch.nn.Module):
    """GIN"""
    def __init__(self, dim_h):
        super(GIN, self).__init__()
        self.conv1 = GINConv(
            Sequential(Linear(dataset.num_node_features, dim_h),
                       BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv2 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.conv3 = GINConv(
            Sequential(Linear(dim_h, dim_h), BatchNorm1d(dim_h), ReLU(),
                       Linear(dim_h, dim_h), ReLU()))
        self.lin1 = Linear(dim_h*3, dim_h*3)
        self.lin2 = Linear(dim_h*3, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # Node embeddings 
        h1 = self.conv1(x, edge_index)
        h2 = self.conv2(h1, edge_index)
        h3 = self.conv3(h2, edge_index)

        # Graph-level readout
        h1 = global_add_pool(h1, batch)
        h2 = global_add_pool(h2, batch)
        h3 = global_add_pool(h3, batch)

        # Concatenate graph embeddings
        h = torch.cat((h1, h2, h3), dim=1)

        # Classifier
        h = self.lin1(h)
        h = h.relu()
        h = F.dropout(h, p=0.5, training=self.training)
        h = self.lin2(h)
        
        return h, F.log_softmax(h, dim=1)

gcn = GCN(dim_h=32)
gin = GIN(dim_h=32)
gcn = train(gcn, train_loader)
gin = train(gin, train_loader)

***
# The training : 

> Original : 

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Training :
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  

parameters_model = {"hidden_channels" : 1000,
                    "lr" : 0.0001,
                    "conv" : GATv2Conv,
                    "heads" : 1,
                    "dropout" : 0.1,
                    "criterion" : torch.nn.BCEWithLogitsLoss(),
                    "n_kl_types" : len(graph_data["A"].x[0])
                   }

def train(model, data, optimizer, criterion, edge_type):
    model.train()
    data = data.to(device)
    optimizer.zero_grad()
    out_model = model(data)
    edge_labels = data[edge_type].edge_label
    loss = criterion(out_model, edge_labels)
    loss.backward()
    optimizer.step()
    return loss.item() 

@torch.no_grad()
def evaluate(model, data, criterion, edge_type):
    model.eval()
    data = data.to(device)
    out = model(data)
    edge_labels = data[edge_type].edge_label
    val_loss = criterion(out, edge_labels)
    probs = torch.sigmoid(out)
    pred_class = probs.round()
    all_preds = pred_class
    all_labels = edge_labels
    all_probs = probs
    # Calculate the metrics
    f1 = f1_score(all_labels, all_preds, average='binary')
    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')  
    mcc = matthews_corrcoef(all_labels, all_preds)  
    accuracy = accuracy_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_probs)
    return val_loss.item(), f1, precision, recall, mcc, accuracy, auc 

def main():
    logging.info(f"Let's start the work with {conv}\t{hidden_channels}\t{dropout}\t{lr}\t{heads}")
    model = Model(parameters_model["conv"],parameters_model["hidden_channels"],
                  parameters_model["heads"],parameters_model["dropout"],
                  parameters_model["n_kl_types"]).to(device)
    model(train_data)
    optimizer = torch.optim.Adam(model.parameters(), lr = parameters_model["lr"] , weight_decay=0.001)
    #optimizer = torch.optim.AdamW(model.parameters(), lr = parameters_model["lr"])
    scheduler = ReduceLROnPlateau(optimizer, 'min')
    edge_type = ("B1", "infects", "A")
    for epoch in range(3000):
        train_loss = train(model, train_data, optimizer, parameters_model["criterion"], edge_type)
        if epoch % 25 == 0:
            # Get all metrics including recall and MCC from evaluate function
            test_loss, f1, precision, recall, mcc, accuracy, auc = evaluate(model, test_data, parameters_model["criterion"], edge_type)
            info_training_concise = f'Epoch: {epoch}, Train Loss: {train_loss}, Test Loss: {test_loss}, MCC: {mcc}, AUC: {auc}'
            info_training = f'Epoch: {epoch}, Train Loss: {train_loss}, Test Loss: {test_loss}, F1 Score: {f1}, Precision: {precision}, Recall: {recall}, MCC: {mcc}, Accuracy: {accuracy}, AUC: {auc}'
            logging.info(info_training_concise)
            print(info_training)
            scheduler.step(test_loss)
    # Save the model
    #torch.save(model.state_dict(), f"{path_work}/GATv2Conv.debud_clean.1909.pt")
    # The final eval :
    print("Final evaluation ...")
    val_loss, f1, precision, recall, mcc, accuracy, auc = evaluate(model, val_data, criterion, edge_type)
    print(f'F1 Score: MCC :{mcc}, {f1}, Precision: {precision}, Recall: {recall}, Accuracy: {accuracy}, AUC: {auc}')
    logging.info(f"Final evaluation ...\nF1 Score: {f1}, Precision: {precision}, Recall: {recall}, MCC: {mcc}, Accuracy: {accuracy}, AUC: {auc}")

if __name__ == "__main__":
    main()


In [None]:
# *****************************************************************************
# Pre-process data :
transform = T.RandomLinkSplit(
    num_val=0.1, 
    num_test=0.2, 
    #disjoint_train_ratio=...,  
    neg_sampling_ratio=1.0,  
    add_negative_train_samples=True, 
    edge_types=("B1", "infects", "A"),
    rev_edge_types=("A", "harbors", "B1"), 
)

train_data, val_data, test_data = transform(graph_data)





train_loader = LinkNeighborLoader(
    data=train_data,  
    num_neighbors= [-1],  
    edge_label_index=(("B1", "infects", "A"), train_data["B1", "infects", "A"].edge_label_index),
    edge_label=train_data["B1", "infects", "A"].edge_label,
    batch_size=128,
    shuffle=True,
)

val_loader = LinkNeighborLoader(
    data=val_data,  
    num_neighbors= [-1],  
    edge_label_index=(("B1", "infects", "A"), val_data["B1", "infects", "A"].edge_label_index),
    edge_label=val_data["B1", "infects", "A"].edge_label,
    batch_size=128,
    shuffle=True,
)

test_loader = LinkNeighborLoader(
    data=test_data,  
    num_neighbors= [-1],  
    edge_label_index=(("B1", "infects", "A"), test_data["B1", "infects", "A"].edge_label_index),
    edge_label=test_data["B1", "infects", "A"].edge_label,
    batch_size=128,
    shuffle=True,

In [None]:
# *****************************************************************************
# Pre-process data :
transform = T.RandomLinkSplit(
    num_val=0.1,
    num_test=0.2,
    #disjoint_train_ratio=...,
    neg_sampling_ratio=1.0,
    add_negative_train_samples=True,
    edge_types=("B1", "infects", "A"),
    rev_edge_types=("A", "harbors", "B1"),
)

train_data, val_data, test_data = transform(graph_data)


In [None]:
# *****************************************************************************
# Training :
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available

def train(model, data, optimizer, criterion, edge_type):
    model.train()
    data = data.to(device)
    optimizer.zero_grad()
    out = model(data)
    edge_labels = data[edge_type].edge_label
    loss = criterion(out, edge_labels)
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def evaluate(model, data, criterion, edge_type):
    model.eval()
    data = data.to(device)
    out = model(data)
    edge_labels = data[edge_type].edge_label
    val_loss = criterion(out, edge_labels)
    probs = torch.sigmoid(out)
    pred_class = probs.round()
    all_preds = pred_class
    all_labels = edge_labels
    all_probs = probs
    # Calculate the metrics
    f1 = f1_score(all_labels, all_preds, average='binary')
    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')  # Calculate recall
    mcc = matthews_corrcoef(all_labels, all_preds)  # Calculate MCC
    accuracy = accuracy_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_probs)
    return val_loss.item(), f1, precision, recall, mcc, accuracy, auc  # Include recall and MCC in return values

def make_models(NEG):
    # Pre-process data :
    transform = T.RandomLinkSplit(
        num_val=0.1,
        num_test=0.2,
        #disjoint_train_ratio=...,
        neg_sampling_ratio=NEG,
        add_negative_train_samples=True,
        edge_types=("B1", "infects", "A"),
        rev_edge_types=("A", "harbors", "B1"))
    train_data, val_data, test_data = transform(graph_data)
    # the main : 
    hidden_channels = 1000
    lr = 0.0001
    conv = GATv2Conv
    heads = 1
    dropout = 0.3
    logging.info(f"Let's start the work with {conv}\t{hidden_channels}\t{dropout}\t{lr}\t{heads}")
    model = Model(conv,hidden_channels,heads,dropout).to(device)
    model(train_data)
    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    edge_type = ("B1", "infects", "A")
    for epoch in range(5000):
        train_loss = train(model, train_data, optimizer, criterion, edge_type)
        if epoch % 50 == 0:
            # Get all metrics including recall and MCC from evaluate function
            test_loss, f1, precision, recall, mcc, accuracy, auc = evaluate(model, test_data, criterion, edge_type)
            info_training = f'Epoch: {epoch}, Train Loss: {train_loss}, Test Loss: {test_loss}, F1 Score: {f1}, Precision: {precision}, Recall: {recall}, MCC: {mcc}, Accuracy: {accuracy}, AUC: {auc}'
            #logging.info(info_training)
            print(info_training)
    # Save the model
    torch.save(model.state_dict(), f"{path_work}/ensemble/GATv2.{NEG}.model.single_batch.2607.pt")
    # The final eval :
    print("Final evaluation ...")
    val_loss, f1, precision, recall, mcc, accuracy, auc = evaluate(model, val_data, criterion, edge_type)
    print(f'F1 Score: {f1}, Precision: {precision}, Recall: {recall}, MCC: {mcc}, Accuracy: {accuracy}, AUC: {auc}')
    logging.info(f"Final evaluation {NEG} ; F1 Score: {f1}, Precision: {precision}, Recall: {recall}, MCC: {mcc}, Accuracy: {accuracy}, AUC: {auc}")

if __name__ == "__main__":
   with ThreadPool(10) as pool:
        pool.map(make_models, list(range(1,50)))



In [None]:
#!/bin/bash
#BATCH --job-name=GATv2Conv_ensemble__
#SBATCH --qos=medium
#SBATCH --ntasks=1 
#SBATCH --cpus-per-task=40 
#SBATCH --mem=200gb 
#SBATCH --time=4-00:00:00 
#SBATCH --output=GATv2Conv_ensemble%j.log 

source /storage/apps/ANACONDA/anaconda3/etc/profile.d/conda.sh
conda activate torch_geometric

python /home/conchae/prediction_depolymerase_tropism/prophage_prediction/depolymerase_decipher/ficheros_28032023/train_nn/script_files/GATv2Conv_Hetero.Linear.single_batch.ensemble.py

In [None]:
# Training :
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available

parameters_model = {"hidden_channels" : 1280,
                    "lr" : 0.0001,
                    "conv" : GATv2Conv,
                    "heads" : 1,
                    "dropout" : 0.3,
                    "criterion" : torch.nn.BCEWithLogitsLoss(),
                    "n_kl_types" : len(graph_data["A"].x[0]),
                    "weight_decay" : 0.00001
                   }

def train(model, data, optimizer, criterion, edge_type):
    model.train()
    data = data.to(device)
    optimizer.zero_grad()
    out = model(data)
    edge_labels = data[edge_type].edge_label
    loss = criterion(out, edge_labels)
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def evaluate(model, data, criterion, edge_type):
    model.eval()
    data = data.to(device)
    out = model(data)
    edge_labels = data[edge_type].edge_label
    val_loss = criterion(out, edge_labels)
    probs = torch.sigmoid(out)
    pred_class = probs.round()
    all_preds = pred_class
    all_labels = edge_labels
    all_probs = probs
    # Calculate the metrics
    f1 = f1_score(all_labels, all_preds, average='binary')
    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')  # Calculate recall
    mcc = matthews_corrcoef(all_labels, all_preds)  # Calculate MCC
    accuracy = accuracy_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_probs)
    return val_loss.item(), f1, precision, recall, mcc, accuracy, auc  # Include recall and MCC in return values

class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decreases.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')  # save checkpoint
        self.val_loss_min = val_loss

def main():
    logging.info(f"Let's start with {str(parameters_model)} and {graph}")
    model = Model(parameters_model["conv"],parameters_model["hidden_channels"],
                  parameters_model["heads"],parameters_model["dropout"],
                  parameters_model["n_kl_types"]).to(device)
    model(train_data)
    optimizer = torch.optim.AdamW(model.parameters(), lr = parameters_model["lr"], weight_decay= parameters_model["weight_decay"])
    scheduler = ReduceLROnPlateau(optimizer, 'min')
    edge_type = ("B1", "infects", "A")
    for epoch in range(5000):
        train_loss = train(model, train_data, optimizer, parameters_model["criterion"], edge_type)
        if epoch % 50 == 0:
            # Get all metrics including recall and MCC from evaluate function
            test_loss, f1, precision, recall, mcc, accuracy, auc = evaluate(model, test_data, parameters_model["criterion"], edge_type)
            info_training_concise = f'Epoch: {epoch}, Train Loss: {train_loss}, Test Loss: {test_loss}, MCC: {mcc}, AUC: {auc}'
            info_training = f'Epoch: {epoch}, Train Loss: {train_loss}, Test Loss: {test_loss}, F1 Score: {f1}, Precision: {precision}, Recall: {recall}, MCC: {mcc}, Accuracy: {accuracy}, AUC: {auc}'
            logging.info(info_training_concise)
            print(info_training)
            #scheduler.step(test_loss)
    # Save the model
    #torch.save(model.state_dict(), f"{path_work}/GATv2Conv.debud_clean.1909.pt")
    # The final eval :
    print("Final evaluation ...")
    val_loss, f1, precision, recall, mcc, accuracy, auc = evaluate(model, val_data, parameters_model["criterion"], edge_type)
    print(f'F1 Score: MCC :{mcc}, {f1}, Precision: {precision}, Recall: {recall}, Accuracy: {accuracy}, AUC: {auc}')
    logging.info(f"Final evaluation ...\nF1 Score: {f1}, Precision: {precision}, Recall: {recall}, MCC: {mcc}, Accuracy: {accuracy}, AUC: {auc}")


if __name__ == "__main__":
    main()

