In [32]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import torch

In [33]:
def feature_vector(feature_name, df):
    def Jaccard(matrix):
        matrix = np.mat(matrix)

        numerator = matrix * matrix.T

        denominator = (
            np.ones(np.shape(matrix)) * matrix.T
            + matrix * np.ones(np.shape(matrix.T))
            - matrix * matrix.T
        )

        return numerator / denominator

    all_feature = []
    drug_list = np.array(df[feature_name]).tolist()
    # Features for each drug, for example, when feature_name is target, drug_list=["P30556|P05412","P28223|P46098|……"]
    for i in drug_list:
        for each_feature in i.split("|"):
            if each_feature not in all_feature:
                all_feature.append(each_feature)  # obtain all the features
    #print("length of all feature is", len(all_feature))
    feature_matrix = np.zeros((len(drug_list), len(all_feature)), dtype=float)
    df_feature = DataFrame(
        feature_matrix, columns=all_feature
    )  # Consrtuct feature matrices with key of dataframe
    for i in range(len(drug_list)):
        for each_feature in df[feature_name].iloc[i].split("|"):
            df_feature[each_feature].iloc[i] = 1

    df_feature = np.array(df_feature)
    sim_matrix = np.array(Jaccard(df_feature))
    
    #print(feature_name + " len is:" + str(len(sim_matrix[0])))
    return sim_matrix


def prepare(df_drug, feature_list, mechanism, action, drugA, drugB):
    d_label = {}
    d_feature = {}

    # Transfrom the interaction event to number
    d_event = []
    for i in range(len(mechanism)):
        d_event.append(mechanism[i] + " " + action[i])

    count = {}
    for i in d_event:
        if i in count:
            count[i] += 1
        else:
            count[i] = 1
    event_num = len(count)
    list1 = sorted(count.items(), key=lambda x: x[1], reverse=True)
    for i in range(len(list1)):
        d_label[list1[i][0]] = i

    vector = []
    for i in feature_list:
        #vector = np.hstack((vector, feature_vector(i, df_drug, vector_size)))
        vector.append(feature_vector(i, df_drug))
    vector = np.stack(vector, axis=-1)
    # Transfrom the drug ID to feature vector
    for i in range(len(np.array(df_drug["name"]).tolist())):
        d_feature[np.array(df_drug["name"]).tolist()[i]] = vector[i]

    # Use the dictionary to obtain feature vector and label
    new_feature = []
    new_label = []

    for i in range(len(d_event)):
        temp = np.concatenate([d_feature[drugA[i]][None], d_feature[drugB[i]][None]], axis=0)
        new_feature.append(temp)
        new_label.append(d_label[d_event[i]])

    new_feature = np.array(new_feature)  # 323539*....
    new_label = np.array(new_label)  # 323539

    return new_feature, new_label, event_num


df_drug = pd.read_csv("drug_features.csv")
extraction = pd.read_csv("extraction.csv")
mechanism = extraction["mechanism"]
action = extraction["action"]
drugA = extraction["drugA"]
drugB = extraction["drugB"]
feature_list = ["pathway", "target", "enzyme", "category"]
new_feature, new_label, event_num = prepare(df_drug, feature_list, mechanism, action, drugA, drugB)
new_feature = torch.tensor(new_feature, dtype=torch.float32)

In [34]:
import torch
from torch.utils.data import Dataset, DataLoader

class DrugInteractionDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        return self.features[index], self.labels[index]


# Assuming new_feature and new_label are available as numpy arrays
from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
features_train, features_val, labels_train, labels_val = train_test_split(
    new_feature, new_label, test_size=0.2, random_state=42
)

# Create dataset objects
train_dataset = DrugInteractionDataset(features_train, labels_train)
val_dataset = DrugInteractionDataset(features_val, labels_val)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

  self.features = torch.tensor(features, dtype=torch.float32)


In [35]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNDDI(nn.Module):
    def __init__(self):
        super(CNNDDI, self).__init__()
        self.conv1 = nn.Conv2d(2, 64, (3, 1), padding=(1, 0))
        self.conv2 = nn.Conv2d(64, 128, (3, 1), padding=(1, 0))
        self.conv3_1 = nn.Conv2d(128, 128, (3, 1), padding=(1, 0))
        self.conv3_2 = nn.Conv2d(128, 128, (3, 1), padding=(1, 0))
        self.conv4 = nn.Conv2d(128, 256, (3, 1), padding=(1, 0))
        self.fc1 = nn.Linear(256 * 572 * 4, 256)  # Adjust feature_size based on your input dimensions
        self.fc2 = nn.Linear(256, 65)  # Assuming 65 DDI types

    def forward(self, x):
        x = F.leaky_relu(self.conv1(x), negative_slope=0.2)
        x = F.leaky_relu(self.conv2(x), negative_slope=0.2)
        identity = x
        x = F.leaky_relu(self.conv3_1(x), negative_slope=0.2)
        x = self.conv3_2(x)
        x += identity
        x = F.leaky_relu(x, negative_slope=0.2)
        x = F.leaky_relu(self.conv4(x), negative_slope=0.2)
        x = torch.flatten(x, 1)
        x = F.leaky_relu(self.fc1(x), negative_slope=0.2)
        x = self.fc2(x)
        return x


In [36]:
def train(model, device, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

def validate(model, device, val_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for features, labels in val_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
    val_loss = total_loss / len(val_loader)
    val_accuracy = correct / len(val_loader.dataset)
    return val_loss, val_accuracy

def save_checkpoint(model, optimizer, epoch, filename):
    state = {
        'epoch': epoch,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }
    torch.save(state, filename)
    print(f"Saved checkpoint: {filename}")



In [37]:
class focal_loss(nn.Module):
    def __init__(self, gamma=2):
        super(focal_loss, self).__init__()
        self.gamma = gamma

    def forward(self, preds, labels):
        labels = labels[..., None]

        preds_logsoft = F.log_softmax(preds, dim=1)
        preds_softmax = torch.exp(preds_logsoft)

        preds_softmax = preds_softmax.gather(1, labels)
        preds_logsoft = preds_logsoft.gather(1, labels)

        loss = -torch.mul(torch.pow((1 - preds_softmax), self.gamma), preds_logsoft)

        loss = loss.mean()

        return loss

In [38]:
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNDDI().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = focal_loss()

num_epochs = 50
checkpoint_interval = 10
for epoch in tqdm(range(num_epochs)):
    train_loss = train(model, device, train_loader, optimizer, criterion)
    val_loss, val_accuracy = validate(model, device, val_loader, criterion)
    print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    if (epoch + 1) % checkpoint_interval == 0 or (epoch + 1) == num_epochs:
        save_checkpoint(model, optimizer, epoch + 1, f'checkpoints/cnn_ddi_epoch_{epoch+1}.pt')


  2%|▏         | 1/50 [00:21<17:16, 21.15s/it]

Epoch 1, Train Loss: 0.8764, Validation Loss: 0.3628, Validation Accuracy: 0.7918


  4%|▍         | 2/50 [00:42<16:55, 21.16s/it]

Epoch 2, Train Loss: 0.2777, Validation Loss: 0.3381, Validation Accuracy: 0.8103


  6%|▌         | 3/50 [01:03<16:40, 21.28s/it]

Epoch 3, Train Loss: 0.2173, Validation Loss: 0.2545, Validation Accuracy: 0.8415


  8%|▊         | 4/50 [01:25<16:21, 21.34s/it]

Epoch 4, Train Loss: 0.1825, Validation Loss: 0.2843, Validation Accuracy: 0.8480


 10%|█         | 5/50 [01:46<15:56, 21.26s/it]

Epoch 5, Train Loss: 0.1802, Validation Loss: 0.3486, Validation Accuracy: 0.8111


 12%|█▏        | 6/50 [02:07<15:33, 21.21s/it]

Epoch 6, Train Loss: 0.1394, Validation Loss: 0.2700, Validation Accuracy: 0.8332


 14%|█▍        | 7/50 [02:28<15:09, 21.15s/it]

Epoch 7, Train Loss: 0.1302, Validation Loss: 0.2829, Validation Accuracy: 0.8444


 16%|█▌        | 8/50 [02:49<14:46, 21.12s/it]

Epoch 8, Train Loss: 0.1259, Validation Loss: 0.7220, Validation Accuracy: 0.7724


 18%|█▊        | 9/50 [03:10<14:25, 21.11s/it]

Epoch 9, Train Loss: 0.1296, Validation Loss: 0.2329, Validation Accuracy: 0.8791
Epoch 10, Train Loss: 0.1721, Validation Loss: 0.3235, Validation Accuracy: 0.8584


 20%|██        | 10/50 [03:32<14:15, 21.38s/it]

Saved checkpoint: checkpoints/cnn_ddi_epoch_10.pt


 22%|██▏       | 11/50 [03:53<13:49, 21.28s/it]

Epoch 11, Train Loss: 0.0829, Validation Loss: 0.2793, Validation Accuracy: 0.8642


 24%|██▍       | 12/50 [04:14<13:24, 21.16s/it]

Epoch 12, Train Loss: 0.0824, Validation Loss: 0.3565, Validation Accuracy: 0.8449


 26%|██▌       | 13/50 [04:35<12:59, 21.08s/it]

Epoch 13, Train Loss: 0.1575, Validation Loss: 0.2600, Validation Accuracy: 0.8799
