In [None]:
import os

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from utils import set_seed, load_data
from utils import train_model, eval_model

In [None]:
class DeepTrio(nn.Module):
    def __init__(self, em_dim=13, kernel_rate_1=0.16, strides_rate_1=0.15,
                 filter_num_1=150, kernel_rate_2=0.14, strides_rate_2=0.25,
                 filter_num_2=175, con_drop=0.05, fn_drop_1=0.2, fn_drop_2=0.1,
                 node_num=256):
        super(DeepTrio, self).__init__()

        # Create convolution layers for different kernel sizes
        self.conv_layers = nn.ModuleList()
        for n in range(2, 35):
            if n <= 15:
                kernel_size = int(np.ceil(kernel_rate_1 * n ** 2))
                stride = int(np.ceil(strides_rate_1 * (n - 1)))
                conv_layer = nn.Conv1d(
                    in_channels=em_dim,
                    out_channels=filter_num_1,
                    kernel_size=kernel_size,
                    stride=stride,
                    padding=0,
                    bias=False
                )
            else:
                kernel_size = int(np.ceil(kernel_rate_2 * n ** 2))
                stride = int(np.ceil(strides_rate_2 * (n - 1)))
                conv_layer = nn.Conv1d(
                    in_channels=em_dim,
                    out_channels=filter_num_2,
                    kernel_size=kernel_size,
                    stride=stride,
                    padding=0,
                    bias=False
                )
            self.conv_layers.append(conv_layer)

        self.conv_dropout = nn.Dropout2d(con_drop)
        self.fc_dropout1 = nn.Dropout(fn_drop_1)
        self.fc_dropout2 = nn.Dropout(fn_drop_2)

        # Calculate the total number of features after concatenation
        total_features = 0
        for n in range(2, 35):
            if n <= 15:
                total_features += filter_num_1
            else:
                total_features += filter_num_2

        self.fc1 = nn.Linear(total_features, node_num)
        self.fc2 = nn.Linear(node_num, 1)

    def forward(self, x1, x2):

        # Permute for Conv1d: (batch, channels, seq_len)
        x1 = x1.permute(0, 2, 1)
        x2 = x2.permute(0, 2, 1)

        tensor = []

        for i, conv_layer in enumerate(self.conv_layers):
            # Apply convolution
            conv_out_1 = F.relu(conv_layer(x1))
            conv_out_2 = F.relu(conv_layer(x2))

            # Apply dropout
            conv_out_1 = self.conv_dropout(conv_out_1.unsqueeze(-1)).squeeze(-1)
            conv_out_2 = self.conv_dropout(conv_out_2.unsqueeze(-1)).squeeze(-1)

            # Apply max pooling
            pool_out_1 = F.max_pool1d(conv_out_1, conv_out_1.size(-1))
            pool_out_2 = F.max_pool1d(conv_out_2, conv_out_2.size(-1))

            # Flatten
            flat_out_1 = pool_out_1.view(pool_out_1.size(0), -1)
            flat_out_2 = pool_out_2.view(pool_out_2.size(0), -1)

            pool_out = flat_out_1 + flat_out_2

            tensor.append(pool_out)

        # Concatenate all features
        concatenated = torch.cat(tensor, dim=1)

        # Fully connected layers
        x = self.fc_dropout1(concatenated)
        x = self.fc1(x)
        x = self.fc_dropout2(x)
        x = F.relu(x)
        x = self.fc2(x)

        return F.sigmoid(x).squeeze()


In [None]:

spe = "yeast"

data_dir = "ppi-data"
train_file = os.path.join(data_dir, spe, "action/train_action_20.tsv")
val_file = os.path.join(data_dir, spe, "action/val_action_10.tsv")
test_file = os.path.join(data_dir, spe, "action/test_action_10.tsv")
epochs = 10

# from google.colab import drive
#
# drive.mount('/content/drive')
# data_dir = "drive/MyDrive/ppi-data"
# train_file = os.path.join(data_dir, spe, "action/train_action.tsv")
# val_file = os.path.join(data_dir, spe, "action/val_action.tsv")
# test_file = os.path.join(data_dir, spe, "action/test_action.tsv")
# epochs = 50

embedding_h5 = os.path.join(data_dir, spe, "seq/pipr.embedding.h5")

batch_size = 32
lr = 0.001

set_seed(1234)

device = "cuda" if torch.cuda.is_available() else "cpu"

train_loader = load_data(train_file, batch_size, embedding_h5, train=True)
val_loader = load_data(val_file, batch_size, embedding_h5, train=False)
test_loader = load_data(test_file, batch_size, embedding_h5, train=False)


In [None]:
model = DeepTrio().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:
train_model(model, train_loader, val_loader, optimizer, epochs, device)

eval_model(model, test_loader, device)