In [None]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from utils import TransformerLayer, TransformerEncoder
from utils import set_seed, load_data
from utils import train_model, eval_model

In [None]:
class Featuring(nn.Module):
    def __init__(self, input_dim, feature_dim):
        super().__init__()

        self.feature_dim = feature_dim

        self.conv1 = nn.Conv1d(input_dim, feature_dim, kernel_size=3,
                               padding=1)
        layer1 = TransformerLayer(n_heads=3, d_model=feature_dim,
                                  ff_units=10, dropout=0.2)
        self.encoder1 = TransformerEncoder(layer1, n_layers=2)

        self.conv2 = nn.Conv1d(2 * feature_dim, feature_dim, kernel_size=3,
                               padding=1)
        layer2 = TransformerLayer(n_heads=3, d_model=feature_dim,
                                  ff_units=10, dropout=0.2)
        self.encoder2 = TransformerEncoder(layer2, n_layers=2)

        self.conv3 = nn.Conv1d(2 * feature_dim, feature_dim, kernel_size=3,
                               padding=1)
        layer3 = TransformerLayer(n_heads=3, d_model=feature_dim,
                                  ff_units=10, dropout=0.2)
        self.encoder3 = TransformerEncoder(layer3, n_layers=2)

        self.conv4 = nn.Conv1d(2 * feature_dim, feature_dim, kernel_size=3,
                               padding=1)
        layer4 = TransformerLayer(n_heads=3, d_model=feature_dim,
                                  ff_units=10, dropout=0.2)
        self.encoder4 = TransformerEncoder(layer4, n_layers=2)

        self.conv5 = nn.Conv1d(2 * feature_dim, feature_dim, kernel_size=3,
                               padding=1)

        self.pool = nn.MaxPool1d(kernel_size=3, stride=3)
        # self.adaptive_pool = nn.AdaptiveAvgPool1d(1)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # b, input_dim, 2000

        # First layer
        x = self.conv1(x)  # b, feature_dim, 2000,
        x = self.pool(x)  # b, feature_dim, 666
        x = x.permute(0, 2, 1)  # n, 666, feature_dim
        e = self.encoder1(x)  # b, 666, feature_dim
        x = x.permute(0, 2, 1)  # b, feature_dim, 666
        e = e.permute(0, 2, 1)  # b, feature_dim, 666
        x = torch.cat([e, x], dim=1)  # b, 2*feature_dim, 666

        # Second layer
        x = self.conv2(x)
        x = self.pool(x)
        x = x.permute(0, 2, 1)
        e = self.encoder2(x)
        x = x.permute(0, 2, 1)
        e = e.permute(0, 2, 1)
        x = torch.cat([e, x], dim=1)  # b, 2*feature_dim, 222

        # Third layer
        x = self.conv3(x)
        x = self.pool(x)
        x = x.permute(0, 2, 1)
        e = self.encoder3(x)
        x = x.permute(0, 2, 1)
        e = e.permute(0, 2, 1)
        x = torch.cat([e, x], dim=1)  # b, 2*feature_dim, 77

        # Fourth layer
        x = self.conv4(x)
        x = self.pool(x)
        x = x.permute(0, 2, 1)
        e = self.encoder4(x)
        x = x.permute(0, 2, 1)
        e = e.permute(0, 2, 1)
        x = torch.cat([e, x], dim=1)  # b, 2*feature_dim, 28

        x = self.conv5(x)  # b, feature_dim, 28
        # x = self.adaptive_pool(x) # b, feature_dim, 1
        # x = x.squeeze(-1)  # b, feature_dim
        x = x.permute(0, 2, 1)  # b, 28, feature_dim
        return x


class Classifier(nn.Module):
    def __init__(self, input_dim):
        super().__init__()

        self.fc1 = nn.Linear(input_dim, 2 * input_dim)
        self.bn1 = nn.BatchNorm1d(2 * input_dim)
        self.fc2 = nn.Linear(2 * input_dim, input_dim)
        self.bn2 = nn.BatchNorm1d(input_dim)
        self.fc3 = nn.Linear(input_dim, input_dim // 2)
        self.bn3 = nn.BatchNorm1d(input_dim // 2)
        self.fc4 = nn.Linear(input_dim // 2, input_dim // 4)
        self.bn4 = nn.BatchNorm1d(input_dim // 4)
        self.fc5 = nn.Linear(input_dim // 4, 1)

        self.dropout = nn.Dropout(0.5)
        self.leaky_relu = nn.LeakyReLU(0.3)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.leaky_relu(x)
        x = self.dropout(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = self.leaky_relu(x)
        x = self.dropout(x)

        x = self.fc3(x)
        x = self.bn3(x)
        x = self.leaky_relu(x)
        x = self.dropout(x)

        x = self.fc4(x)
        x = self.bn4(x)
        x = self.leaky_relu(x)
        x = self.dropout(x)

        x = self.fc5(x)

        x = x.squeeze(-1)
        x = F.sigmoid(x)

        return x


class InteractionModel(nn.Module):
    def __init__(self, featuring, classifier):
        super().__init__()

        self.featuring = featuring
        self.classifier = classifier
        layer = TransformerLayer(n_heads=3,
                                 d_model=2 * self.featuring.feature_dim,
                                 ff_units=10, dropout=0.5)
        self.encoder = TransformerEncoder(layer, n_layers=2)

    def forward(self, x1, x2):
        x1 = self.featuring(x1)  # b, 28, feature_dim
        x2 = self.featuring(x2)  # b, 28, feature_dim
        x = torch.cat((x1, x2), dim=2)  # b, 28, 2*feature_dim
        x = self.encoder(x)
        x = torch.mean(x, dim=1)  # b, 2*feature_dim
        x = self.classifier(x)
        return x

In [None]:

spe = "yeast"

# data_dir = "ppi-data"
# train_file = os.path.join(data_dir, spe, "action/train_action_20.tsv")
# val_file = os.path.join(data_dir, spe, "action/val_action_10.tsv")
# test_file = os.path.join(data_dir, spe, "action/test_action_10.tsv")
# epochs = 10

from google.colab import drive

drive.mount('/content/drive')
data_dir = "drive/MyDrive/ppi-data"
train_file = os.path.join(data_dir, spe, "action/train_action.tsv")
val_file = os.path.join(data_dir, spe, "action/val_action.tsv")
test_file = os.path.join(data_dir, spe, "action/test_action.tsv")
epochs = 50

embedding_h5 = os.path.join(data_dir, spe, "seq/pipr.embedding.h5")

input_dim = 13
feature_dim = 24
batch_size = 32
lr = 0.0001

set_seed(1234)

device = "cuda" if torch.cuda.is_available() else "cpu"

train_loader = load_data(train_file, batch_size, embedding_h5, train=True)
val_loader = load_data(val_file, batch_size, embedding_h5, train=False)
test_loader = load_data(test_file, batch_size, embedding_h5, train=False)


In [None]:
featuring = Featuring(input_dim, feature_dim)
classifier = Classifier(2 * feature_dim)

model = InteractionModel(featuring, classifier)
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:
train_model(model, train_loader, val_loader, optimizer, epochs, device)

eval_model(model, test_loader, device)