In [1]:
!pip install rdkit
!pip install networkx
!pip install torch-geometric

Collecting rdkit
  Downloading rdkit-2023.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit
Successfully installed rdkit-2023.9.5
Collecting torch-geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.5.3


In [2]:
import torch
import torch.nn as nn
import gdown
import pandas as pd
import torch
from rdkit import Chem
from torch_geometric.data import Data, Batch
import networkx as nx
import matplotlib.pyplot as plt
from torch_geometric.utils import to_networkx

In [3]:
import torch
from torch_geometric.nn import GCNConv, NNConv, BatchNorm
from torch_geometric.data import Data

class DSIGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, edge_channels):
        super(DSIGNN, self).__init__()
        self.conv1 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn1 = BatchNorm(hidden_channels)

        self.conv1b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn1b = BatchNorm(hidden_channels)

        self.conv2 = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn2 = BatchNorm(hidden_channels)

        self.fc1 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.fc2 = torch.nn.Linear(hidden_channels, out_channels)
        self.dropout = torch.nn.Dropout(p=0.5)

    def l2_regularization(self):
        l2_reg = 0
        for param in self.parameters():
            l2_reg += torch.sum(param ** 2)
        return l2_reg

    def forward(self, graph):
        v=graph.x
        edge_index = graph.edge_index
        e = graph.edge_attr
        indices = graph.indices

        v1 = v[torch.tensor(indices["node"][0][0])]

        e1 = e[torch.tensor(indices["edge"][0][0])]
        e2 = e[torch.tensor(indices["edge"][0][1])]

        eidx1 = edge_index[:, torch.tensor(indices["edge_index"][0][0])]
        eidx2 = edge_index[:, torch.tensor(indices["edge_index"][0][1])]

        graph1 = self.conv1(v1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph1 = self.conv1b(graph1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        combined_nodes = torch.cat((graph1, torch.zeros(1, 8)), dim=0)
        v2 = self.conv2(combined_nodes, eidx2, e2)
        v2 = self.bn2(v2)
        v2 = torch.nn.functional.elu(v2)
        v2 = self.dropout(v2[-1])

        v2 = self.fc1(v2)
        v2 = torch.nn.functional.elu(v2)

        v2 = self.fc2(v2)
        v2 = torch.nn.functional.elu(v2)

        return v2

In [4]:
import pickle

# Load the model
with open('/content/dsi_model.pkl', 'rb') as f:
    dsi_model = pickle.load(f)

# Put the model in evaluation mode
dsi_model.eval()

DSIGNN(
  (conv1): NNConv(3, 8, aggr=add, nn=Linear(in_features=2, out_features=24, bias=True))
  (bn1): BatchNorm(8)
  (conv1b): NNConv(8, 8, aggr=add, nn=Linear(in_features=2, out_features=64, bias=True))
  (bn1b): BatchNorm(8)
  (conv2): NNConv(8, 8, aggr=add, nn=Linear(in_features=2, out_features=64, bias=True))
  (bn2): BatchNorm(8)
  (fc1): Linear(in_features=8, out_features=8, bias=True)
  (fc2): Linear(in_features=8, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [5]:
class DDIGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, edge_channels):
        super(DDIGNN, self).__init__()
        self.conv1 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn1 = BatchNorm(hidden_channels)

        self.conv1b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn1b = BatchNorm(hidden_channels)

        self.conv2 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn2 = BatchNorm(hidden_channels)

        self.conv2b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn2b = BatchNorm(hidden_channels)

        self.conv3 = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn3 = BatchNorm(hidden_channels)

        self.fc1 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.dropout = torch.nn.Dropout(p=0.5)

        self.fc2 = torch.nn.Linear(hidden_channels, out_channels)

    def l2_regularization(self):
        l2_reg = 0
        for param in self.parameters():
            l2_reg += torch.sum(param ** 2)
        return l2_reg

    def forward(self, graph):
        v=graph.x
        edge_index = graph.edge_index
        e = graph.edge_attr
        indices = graph.indices

        v1 = v[torch.tensor(indices["node"][0][0])]
        v2 = v[torch.tensor(indices["node"][0][1])]

        e1 = e[torch.tensor(indices["edge"][0][0])]
        e2 = e[torch.tensor(indices["edge"][0][1])]
        e3 = e[torch.tensor(indices["edge"][0][2])]

        eidx1 = edge_index[:, torch.tensor(indices["edge_index"][0][0])]
        eidx2 = torch.sub(edge_index[:, torch.tensor(indices["edge_index"][0][1])], v1.shape[0])
        eidx3 = edge_index[:, torch.tensor(indices["edge_index"][0][2])]

        graph1 = self.conv1(v1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph1 = self.conv1b(graph1, eidx1, e1)
        graph1 = self.bn1b(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph2 = self.conv2(v2, eidx2, e2)
        graph2 = self.bn2(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        graph2 = self.conv2b(graph2, eidx2, e2)
        graph2 = self.bn2b(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        combined_nodes = torch.cat((graph1, graph2, torch.zeros(1, graph1.shape[1])), dim=0)
        v3 = self.conv3(combined_nodes, eidx3, e3)
        v3 = self.bn3(v3)
        v3 = torch.nn.functional.elu(v3)
        v3 = self.dropout(v3[-1])

        v3 = self.fc1(v3)
        v3 = torch.nn.functional.elu(v3)

        v3 = self.fc2(v3)
        v3 = torch.nn.functional.elu(v3)

        return v3

In [6]:
with open('/content/ddi_model.pkl', 'rb') as f:
    ddi_model = pickle.load(f)

# Put the model in evaluation mode
ddi_model.eval()

DDIGNN(
  (conv1): NNConv(3, 40, aggr=add, nn=Linear(in_features=2, out_features=120, bias=True))
  (bn1): BatchNorm(40)
  (conv1b): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn1b): BatchNorm(40)
  (conv2): NNConv(3, 40, aggr=add, nn=Linear(in_features=2, out_features=120, bias=True))
  (bn2): BatchNorm(40)
  (conv2b): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn2b): BatchNorm(40)
  (conv3): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn3): BatchNorm(40)
  (fc1): Linear(in_features=40, out_features=40, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=40, out_features=2, bias=True)
)

In [7]:
class EffectiveGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, edge_channels):
        super(EffectiveGNN, self).__init__()
        self.conv1 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn1 = BatchNorm(hidden_channels)

        self.conv1b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn1b = BatchNorm(hidden_channels)

        self.conv2 = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn2 = BatchNorm(hidden_channels)

        self.fc1 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.fc2 = torch.nn.Linear(hidden_channels, out_channels)
        self.dropout = torch.nn.Dropout(p=0.5)

    def l2_regularization(self):
        l2_reg = 0
        for param in self.parameters():
            l2_reg += torch.sum(param ** 2)
        return l2_reg

    def forward(self, graph):
        v=graph.x
        edge_index = graph.edge_index
        e = graph.edge_attr
        indices = graph.indices

        v1 = v[torch.tensor(indices["node"][0][0])]

        e1 = e[torch.tensor(indices["edge"][0][0])]
        e2 = e[torch.tensor(indices["edge"][0][1])]

        eidx1 = edge_index[:, torch.tensor(indices["edge_index"][0][0])]
        eidx2 = edge_index[:, torch.tensor(indices["edge_index"][0][1])]

        graph1 = self.conv1(v1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph1 = self.conv1b(graph1, eidx1, e1)
        graph1 = self.bn1b(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        combined_nodes = torch.cat((graph1, torch.zeros(1, graph1.shape[1])), dim=0)

        graph2 = self.conv2(combined_nodes, eidx2, e2)
        graph2 = self.bn2(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        graph2 = self.dropout(graph2[-1])
        graph2 = self.fc1(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        graph2 = self.fc2(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        return graph2

In [8]:
with open('/content/effectiveness_model_updated.pkl', 'rb') as f:
    effectiveness_model = pickle.load(f)

# Put the model in evaluation mode
effectiveness_model.eval()

EffectiveGNN(
  (conv1): NNConv(3, 8, aggr=add, nn=Linear(in_features=2, out_features=24, bias=True))
  (bn1): BatchNorm(8)
  (conv1b): NNConv(8, 8, aggr=add, nn=Linear(in_features=2, out_features=64, bias=True))
  (bn1b): BatchNorm(8)
  (conv2): NNConv(8, 8, aggr=add, nn=Linear(in_features=2, out_features=64, bias=True))
  (bn2): BatchNorm(8)
  (fc1): Linear(in_features=8, out_features=8, bias=True)
  (fc2): Linear(in_features=8, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [9]:
class SynergyGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, edge_channels):
        super(SynergyGNN, self).__init__()
        self.conv1 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn1 = BatchNorm(hidden_channels)

        self.conv1b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn1b = BatchNorm(hidden_channels)

        self.conv2 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn2 = BatchNorm(hidden_channels)

        self.conv2b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn2b = BatchNorm(hidden_channels)

        self.conv3 = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn3 = BatchNorm(hidden_channels)

        self.fc1 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.dropout = torch.nn.Dropout(p=0.5)

        self.fc2 = torch.nn.Linear(hidden_channels, out_channels)

    def l2_regularization(self):
        l2_reg = 0
        for param in self.parameters():
            l2_reg += torch.sum(param ** 2)
        return l2_reg

    def forward(self, graph):
        v=graph.x
        edge_index = graph.edge_index
        e = graph.edge_attr
        indices = graph.indices

        v1 = v[torch.tensor(indices["node"][0][0])]
        v2 = v[torch.tensor(indices["node"][0][1])]

        e1 = e[torch.tensor(indices["edge"][0][0])]
        e2 = e[torch.tensor(indices["edge"][0][1])]
        e3 = e[torch.tensor(indices["edge"][0][2])]

        eidx1 = edge_index[:, torch.tensor(indices["edge_index"][0][0])]
        eidx2 = torch.sub(edge_index[:, torch.tensor(indices["edge_index"][0][1])], v1.shape[0])
        eidx3 = edge_index[:, torch.tensor(indices["edge_index"][0][2])]

        graph1 = self.conv1(v1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph1 = self.conv1b(graph1, eidx1, e1)
        graph1 = self.bn1b(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph2 = self.conv2(v2, eidx2, e2)
        graph2 = self.bn2(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        graph2 = self.conv2b(graph2, eidx2, e2)
        graph2 = self.bn2b(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        combined_nodes = torch.cat((graph1, graph2, torch.zeros(1, graph1.shape[1])), dim=0)
        v3 = self.conv3(combined_nodes, eidx3, e3)
        v3 = self.bn3(v3)
        v3 = torch.nn.functional.elu(v3)
        v3 = self.dropout(v3[-1])

        v3 = self.fc1(v3)
        v3 = torch.nn.functional.elu(v3)

        v3 = self.fc2(v3)
        v3 = torch.nn.functional.elu(v3)

        return v3

In [10]:
with open('catalyst_model.pkl', 'rb') as f:
    catalyst_model = pickle.load(f)

# Put the model in evaluation mode
catalyst_model.eval()

SynergyGNN(
  (conv1): NNConv(6, 40, aggr=add, nn=Linear(in_features=2, out_features=240, bias=True))
  (bn1): BatchNorm(40)
  (conv1b): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn1b): BatchNorm(40)
  (conv2): NNConv(6, 40, aggr=add, nn=Linear(in_features=2, out_features=240, bias=True))
  (bn2): BatchNorm(40)
  (conv2b): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn2b): BatchNorm(40)
  (conv3): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn3): BatchNorm(40)
  (fc1): Linear(in_features=40, out_features=40, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=40, out_features=6, bias=True)
)

In [11]:
import pandas as pd

In [12]:
df = pd.read_csv('/content/three_merged.csv')

In [13]:
df

Unnamed: 0.1,Unnamed: 0,Drug_ID_x,Drug,Y_x,Drug_canon,Drug_ID_y,Target_ID,Target,Y_y,Drug1_ID_1,Drug1,Drug2_ID_1,Drug2,Y_1,Y.1_1,Drug1_ID_2,Drug2_ID_2,Y_2,Y.1_2
0,0,hexadecanoic acid,CCCCCCCCCCCCCCCC(=O)O,-6.806909,CCCCCCCCCCCCCCCC(O)=O,985.0,P15090,MCDAFVGTWKLVSSENFDDYMKEVGVGFATRKVAGMAKPNMIISVN...,83000.0,DB03796,CCCCCCCCCCCCCCCC(O)=O,DB06695,CCCCCCOC(=O)\N=C(\N)C1=CC=C(NCC2=NC3=C(C=CC(=C...,77,The serum concentration of the active metaboli...,DB03796,DB06695,77,Astrocytoma
1,1,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,5576.0,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,DB00347,CN1C(=O)OC(C)(C)C1=O,DB06414,CC1=CC(=CC(C)=C1OC1=C(Br)C(N)=NC(NC2=CC=C(C=C2...,47,The metabolism of #Drug2 can be decreased when...,DB00347,DB06414,47,Anorexia
2,2,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,5576.0,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,DB00347,CN1C(=O)OC(C)(C)C1=O,DB06413,NC(=O)C[S@@](=O)C(C1=CC=CC=C1)C1=CC=CC=C1,47,The metabolism of #Drug2 can be decreased when...,DB00347,DB06413,47,Anorexia
3,3,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,5576.0,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,DB00347,CN1C(=O)OC(C)(C)C1=O,DB06595,CO[C@@H]1[C@@H](C[C@H]2O[C@]1(C)N1C3=C(C=CC=C3...,47,The metabolism of #Drug2 can be decreased when...,DB00347,DB06595,47,Anorexia
4,4,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,5576.0,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,DB00347,CN1C(=O)OC(C)(C)C1=O,DB01072,COC(=O)N[C@H](C(=O)N[C@@H](CC1=CC=CC=C1)[C@@H]...,47,The metabolism of #Drug2 can be decreased when...,DB00347,DB01072,47,Anorexia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,33,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,5576.0,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,DB00615,CO[C@H]1\C=C\O[C@@]2(C)OC3=C(C2=O)C2=C(C(O)=C3...,DB00347,CN1C(=O)OC(C)(C)C1=O,4,The metabolism of #Drug2 can be increased when...,DB00615,DB00347,4,abortion missed
151,34,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,5576.0,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,DB00252,O=C1NC(=O)C(N1)(C1=CC=CC=C1)C1=CC=CC=C1,DB00347,CN1C(=O)OC(C)(C)C1=O,4,The metabolism of #Drug2 can be increased when...,DB00252,DB00347,4,abortion missed
152,35,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,5576.0,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,DB00312,CCCC(C)C1(CC)C(=O)NC(=O)NC1=O,DB00347,CN1C(=O)OC(C)(C)C1=O,4,The metabolism of #Drug2 can be increased when...,DB00312,DB00347,4,abortion missed
153,36,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,5576.0,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,DB01083,CCCCCCCCCCC[C@@H](C[C@@H]1OC(=O)[C@H]1CCCCCC)O...,DB00347,CN1C(=O)OC(C)(C)C1=O,75,The serum concentration of #Drug2 can be decre...,DB01083,DB00347,75,asthenopia


In [14]:
df=df.drop(['Unnamed: 0', 'Drug_ID_y', 'Drug1_ID_1', 'Drug2_ID_1', 'Drug2_ID_2', 'Drug1_ID_2' ], axis=1)

In [15]:
df=df.rename(columns={"Drug_ID_x": "Drug_ID", "Y_x": "Solubility", "Drug_canon": "Drug_standardized", "Y_y":"Binding_Affinity", "Y_1":"Drug_Interaction", "Y.1_1":"Description", "Y.1_2":"Side_Effect" })

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155 entries, 0 to 154
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Drug_ID            155 non-null    object 
 1   Drug               155 non-null    object 
 2   Solubility         155 non-null    float64
 3   Drug_standardized  155 non-null    object 
 4   Target_ID          155 non-null    object 
 5   Target             155 non-null    object 
 6   Binding_Affinity   155 non-null    float64
 7   Drug1              155 non-null    object 
 8   Drug2              155 non-null    object 
 9   Drug_Interaction   155 non-null    int64  
 10  Description        155 non-null    object 
 11  Y_2                155 non-null    int64  
 12  Side_Effect        155 non-null    object 
dtypes: float64(2), int64(2), object(9)
memory usage: 15.9+ KB


In [17]:
df.head()

Unnamed: 0,Drug_ID,Drug,Solubility,Drug_standardized,Target_ID,Target,Binding_Affinity,Drug1,Drug2,Drug_Interaction,Description,Y_2,Side_Effect
0,hexadecanoic acid,CCCCCCCCCCCCCCCC(=O)O,-6.806909,CCCCCCCCCCCCCCCC(O)=O,P15090,MCDAFVGTWKLVSSENFDDYMKEVGVGFATRKVAGMAKPNMIISVN...,83000.0,CCCCCCCCCCCCCCCC(O)=O,CCCCCCOC(=O)\N=C(\N)C1=CC=C(NCC2=NC3=C(C=CC(=C...,77,The serum concentration of the active metaboli...,77,Astrocytoma
1,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.4568,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CN1C(=O)OC(C)(C)C1=O,CC1=CC(=CC(C)=C1OC1=C(Br)C(N)=NC(NC2=CC=C(C=C2...,47,The metabolism of #Drug2 can be decreased when...,47,Anorexia
2,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.4568,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CN1C(=O)OC(C)(C)C1=O,NC(=O)C[S@@](=O)C(C1=CC=CC=C1)C1=CC=CC=C1,47,The metabolism of #Drug2 can be decreased when...,47,Anorexia
3,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.4568,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CN1C(=O)OC(C)(C)C1=O,CO[C@@H]1[C@@H](C[C@H]2O[C@]1(C)N1C3=C(C=CC=C3...,47,The metabolism of #Drug2 can be decreased when...,47,Anorexia
4,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.4568,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CN1C(=O)OC(C)(C)C1=O,COC(=O)N[C@H](C(=O)N[C@@H](CC1=CC=CC=C1)[C@@H]...,47,The metabolism of #Drug2 can be decreased when...,47,Anorexia


In [None]:
import pandas as pd
harsh = pd.read_csv('/content/Updated_effectiveness.csv')
harsh.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11931 entries, 0 to 11930
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Unnamed: 0      11931 non-null  int64  
 1   Drug_ID         11931 non-null  object 
 2   Drug            11930 non-null  object 
 3   Target_ID       11931 non-null  object 
 4   Target          11931 non-null  object 
 5   Y               11931 non-null  float64
 6   Target_encoded  11931 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 652.6+ KB


In [None]:
harsh

Unnamed: 0.1,Unnamed: 0,Drug_ID,Drug,Target_ID,Target,Y,Target_encoded
0,0,51.0,OC(=O)CCC(=O)C(O)=O,Q9GZT9,MANDSGGPGGPSPSERDRQYCELCGKMENLLRCSRCRSSFYCCKEH...,6.802395,0.000000
1,1,187.0,CC(=O)OCC[N+](C)(C)C,P58154,MRRNIFCLACLWIVQACLSLDRADILYNIRQTSRPDVIPTQRDRPV...,6.126869,0.026667
2,2,264.0,CCCC(O)=O,Q9Z429,MQQFTIRTRLLMLVGAMFIGFITIELMGFSALQRGVASLNTVYLDR...,11.429544,0.053333
3,3,311.0,OC(=O)C(O)(CC(O)=O)CC(O)=O,P15474,MPRSLANAPIMILNGPNLNLLGQRQPEIYGSDTLADVEALCVKAAA...,8.895630,0.080000
4,4,525.0,OC(=O)C(O)CC(O)=O,Q9Z429,MQQFTIRTRLLMLVGAMFIGFITIELMGFSALQRGVASLNTVYLDR...,9.047821,0.053333
...,...,...,...,...,...,...,...
4975,4975,70701916.0,CN(C)c1ccc(cc1)-c1[n]c(c[s]1)C(=O)NO,O67648,MGLEKTVKEKLSFEGVGIHTGEYSKLIIHPEKEGTGIRFFKNGVYI...,10.247077,4.346667
4976,4976,70701953.0,CC(C)(C)c1cc(NC(=O)Nc2[n]c(CCOCc3ccc(F)cc3)c[s...,Q16832,MILIPRMLLVLFLLLPILSSAKAQVNPAICRYPLGMSGGQIPDEDI...,6.049733,5.040000
4977,4977,70702054.0,Nc1[n]c[n]c2c1[n]c[n]2C1OC(CSCCC(N)C(O)=O)[C@@...,Q8TEK3,MGEKLELRLKSPVGAEPAVYPWPLPVYDKHHDAAHEIIETIRWVCE...,4.262680,14.506667
4978,4978,70702057.0,CN(CCNC(=O)Nc1ccc(cc1)C(C)(C)C)CC1OC([C@H](O)[...,Q8TEK3,MGEKLELRLKSPVGAEPAVYPWPLPVYDKHHDAAHEIIETIRWVCE...,5.117994,14.506667


In [None]:
mydict = harsh.set_index('Target_ID')['Target_encoded'].to_dict()
print(mydict['P15090'])

0.24


In [None]:
df['Target_encoded'] = df['Target_ID'].map(mydict)

In [None]:
df

Unnamed: 0,Drug_ID,Drug,Solubility,Drug_standardized,Target_ID,Target,Binding_Affinity,Drug1,Drug2,Drug_Interaction,Description,Y_2,Side_Effect,Target_encoded
0,hexadecanoic acid,CCCCCCCCCCCCCCCC(=O)O,-6.806909,CCCCCCCCCCCCCCCC(O)=O,P15090,MCDAFVGTWKLVSSENFDDYMKEVGVGFATRKVAGMAKPNMIISVN...,83000.0,CCCCCCCCCCCCCCCC(O)=O,CCCCCCOC(=O)\N=C(\N)C1=CC=C(NCC2=NC3=C(C=CC(=C...,77,The serum concentration of the active metaboli...,77,Astrocytoma,0.240000
1,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CN1C(=O)OC(C)(C)C1=O,CC1=CC(=CC(C)=C1OC1=C(Br)C(N)=NC(NC2=CC=C(C=C2...,47,The metabolism of #Drug2 can be decreased when...,47,Anorexia,0.426667
2,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CN1C(=O)OC(C)(C)C1=O,NC(=O)C[S@@](=O)C(C1=CC=CC=C1)C1=CC=CC=C1,47,The metabolism of #Drug2 can be decreased when...,47,Anorexia,0.426667
3,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CN1C(=O)OC(C)(C)C1=O,CO[C@@H]1[C@@H](C[C@H]2O[C@]1(C)N1C3=C(C=CC=C3...,47,The metabolism of #Drug2 can be decreased when...,47,Anorexia,0.426667
4,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CN1C(=O)OC(C)(C)C1=O,COC(=O)N[C@H](C(=O)N[C@@H](CC1=CC=CC=C1)[C@@H]...,47,The metabolism of #Drug2 can be decreased when...,47,Anorexia,0.426667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CO[C@H]1\C=C\O[C@@]2(C)OC3=C(C2=O)C2=C(C(O)=C3...,CN1C(=O)OC(C)(C)C1=O,4,The metabolism of #Drug2 can be increased when...,4,abortion missed,0.426667
151,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,O=C1NC(=O)C(N1)(C1=CC=CC=C1)C1=CC=CC=C1,CN1C(=O)OC(C)(C)C1=O,4,The metabolism of #Drug2 can be increased when...,4,abortion missed,0.426667
152,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CCCC(C)C1(CC)C(=O)NC(=O)NC1=O,CN1C(=O)OC(C)(C)C1=O,4,The metabolism of #Drug2 can be increased when...,4,abortion missed,0.426667
153,trimethadione,CN1C(=O)OC(C)(C)C1=O,-0.456800,CN1C(=O)OC(C)(C)C1=O,P00918,MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKP...,2000000.0,CCCCCCCCCCC[C@@H](C[C@@H]1OC(=O)[C@H]1CCCCCC)O...,CN1C(=O)OC(C)(C)C1=O,75,The serum concentration of #Drug2 can be decre...,75,asthenopia,0.426667


In [None]:
df['Target'][1]

'MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKPLSVSYDQATSLRILNNGHAFNVEFDDSQDKAVLKGGPLDGTYRLIQFHFHWGSLDGQGSEHTVDKKKYAAELHLVHWNTKYGDFGKAVQQPDGLAVLGIFLKVGSAKPGLQKVVDVLDSIKTKGKSADFTNFDPRGLLPESLDYWTYPGSLTTPPLLECVTWIVLKEPISVSSEQVLKFRKLNFNGEGEPEELMVDNWRPAQPLKNRQIKASFK'

In [None]:
common['Drug'].unique()

array(['CN1C(=O)OC(C)(C)C1=O'], dtype=object)

In [21]:
def molecule_to_graph(molecule):
    num_atoms = molecule.GetNumAtoms()
    x = torch.tensor([atom_feature_vector(atom) for atom in molecule.GetAtoms()], dtype=torch.float)
    edge_index = []
    edge_attr = []
    for bond in molecule.GetBonds():
        edge_index.extend([[bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()], [bond.GetEndAtomIdx(), bond.GetBeginAtomIdx()]])
        edge_attr.extend([bond_feature_vector(bond), bond_feature_vector(bond)])
    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    edge_attr = torch.tensor(edge_attr, dtype=torch.float)
    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr)

def atom_feature_vector(atom):
    return [atom.GetAtomicNum(), atom.GetDegree(), atom.GetHybridization()]

def bond_feature_vector(bond):
    return [bond.GetBondTypeAsDouble(), bond.IsInRing()]

def visualize(graph):
    nx_graph = to_networkx(graph, to_undirected=True)

    fig = plt.figure(figsize=(10, 10))
    pos = nx.spring_layout(nx_graph)

    nx.draw_networkx(nx_graph, pos, with_labels=True, node_size=500, font_size=12, font_weight='bold')

    if 'edge_attr' in next(iter(nx_graph.edges(data=True)))[-1]:
        edge_labels = {(u, v): d['edge_attr'] for u, v, d in nx_graph.edges(data=True)}
        nx.draw_networkx_edge_labels(nx_graph, pos, edge_labels=edge_labels, font_size=10)

    plt.axis('off')
    plt.show()

# DSI Predictions

In [32]:
import numpy as np
graphs1 = []
indices1 = []

for _, row in df.iterrows():
    try:
        idx_dict = {}
        if isinstance(row['Drug'], str):
            molecule = Chem.MolFromSmiles(row['Drug'])
            graph = molecule_to_graph(molecule)

            required_feature_size = graph.x.shape[1]  # The total number of features per node

            # Initialize a vector of zeros with the required_feature_size
            # Since there is no 'Y' column, we use only zeros
            catalyst_score_features = torch.zeros(1, required_feature_size)

            catalyst_score_node_index = graph.x.shape[0]

            combined_x = torch.cat([graph.x, catalyst_score_features], dim=0)
            combined_x_idx = [[], []]
            combined_x_idx[0] = np.arange(0, graph.x.shape[0])  # graph 1 node indices
            combined_x_idx[1] = np.arange(graph.x.shape[0], graph.x.shape[0] + 1)  # index for the additional zeros

            idx_dict["node"] = combined_x_idx

            # Creating additional edges connecting the new node to all existing nodes
            additional_edge_index = torch.cat([
                torch.tensor([[i, catalyst_score_node_index] for i in range(graph.x.shape[0])])], dim=0).t().contiguous()

            # Assuming all additional edges have the same attributes, such as a weight of 1
            additional_edge_attr = torch.ones((additional_edge_index.shape[1], graph.edge_attr.shape[1]), dtype=torch.float)

            combined_edge_index = torch.cat([graph.edge_index, additional_edge_index], dim=1)
            combined_edge_index_idx = [[], []]
            combined_edge_index_idx[0] = np.arange(0, graph.edge_index.shape[1])  # Original edges' indices
            combined_edge_index_idx[1] = np.arange(graph.edge_index.shape[1], graph.edge_index.shape[1] + additional_edge_index.shape[1])  # New edges' indices

            idx_dict["edge_index"] = combined_edge_index_idx

            combined_edge_attr = torch.cat([graph.edge_attr, additional_edge_attr], dim=0)
            combined_edge_attr_idx = [[], []]
            combined_edge_attr_idx[0] = np.arange(0, graph.edge_attr.shape[0])  # Original edge attributes' indices
            combined_edge_attr_idx[1] = np.arange(graph.edge_attr.shape[0], graph.edge_attr.shape[0] + additional_edge_attr.shape[0])  # New edge attributes' indices

            idx_dict["edge"] = combined_edge_attr_idx

            # Create the final graph data object with the combined nodes, edges, and attributes
            combined_graph = Data(x=combined_x, edge_index=combined_edge_index, edge_attr=combined_edge_attr)
            graphs1.append(combined_graph)
            indices1.append(idx_dict)

    except Exception as e:
        print(f"Error processing row: {e}")
        continue



CCCCCCCCCCCCCCCC(=O)O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C)(C)C1=O
CN1C(=O)OC(C

In [23]:
print(len(graphs1))
print(len(indices1))

155
155


In [24]:
graphs1

[Data(x=[19, 3], edge_index=[2, 52], edge_attr=[52, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2, 30], edge_attr=[30, 2]),
 Data(x=[11, 3], edge_index=[2,

In [25]:
indices1

[{'node': [array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
          17]),
   array([18])],
  'edge_index': [array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]),
   array([34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
          51])],
  'edge': [array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
          17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]),
   array([34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
          51])]},
 {'node': [array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([10])],
  'edge_index': [array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
          17, 18, 19]),
   array([20, 21, 22, 23, 24, 25, 26, 27, 28, 29])],
  'edge': [array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
          17, 18, 19]),
   array(

In [26]:
from torch_geometric.loader import DataLoader
data1 = []
for graph, index in zip(graphs1, indices1):
    graph.indices = index
    data1.append(graph)

graph1_loader = DataLoader(data1, batch_size=1, shuffle=False,  drop_last=False)
print(len(data1))

155


In [27]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dsi_model.to(device)

DSIGNN(
  (conv1): NNConv(3, 8, aggr=add, nn=Linear(in_features=2, out_features=24, bias=True))
  (bn1): BatchNorm(8)
  (conv1b): NNConv(8, 8, aggr=add, nn=Linear(in_features=2, out_features=64, bias=True))
  (bn1b): BatchNorm(8)
  (conv2): NNConv(8, 8, aggr=add, nn=Linear(in_features=2, out_features=64, bias=True))
  (bn2): BatchNorm(8)
  (fc1): Linear(in_features=8, out_features=8, bias=True)
  (fc2): Linear(in_features=8, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [30]:
dsi_predictions = []
with torch.no_grad():
    for d in graph1_loader:
        d = d.to(device)
        output = dsi_model(d)
        print(output)
        dsi_predictions.extend(output.cpu().numpy())

tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0.3356])
tensor([-0

In [None]:
len(dsi_predictions)

155

In [None]:
print(set(dsi_predictions))

{-0.3355835}


In [None]:
dsi_predictions

# DDI Predictions

In [None]:
import torch
from rdkit import Chem
from torch_geometric.data import Data
import numpy as np
# Define function placeholders (atom_feature_vector, bond_feature_vector, molecule_to_graph) here
graphs2 = []
indices2 = []
count = 0

for _, row in df.iterrows():
    idx_dict = {}
    try:
        molecule1 = Chem.MolFromSmiles(row['Drug1'])
        molecule2 = Chem.MolFromSmiles(row['Drug2'])

        # Check if molecules are None (indicating parsing failure)
        if molecule1 is None or molecule2 is None:
            print("Parsing error: Skipping row due to invalid SMILES.")
            continue

        graph1 = molecule_to_graph(molecule1)
        graph2 = molecule_to_graph(molecule2)

        ddi_features = torch.zeros((1, 2), dtype=torch.float)

        ddi_features_node_index = graph1.x.shape[0] + graph2.x.shape[0]
        pad_size = graph1.x.shape[1] - ddi_features.shape[1]

        ddi_features_padded = torch.cat([ddi_features,torch.zeros((ddi_features.shape[0],pad_size))],dim=1)

        combined_x = torch.cat([graph1.x, graph2.x, ddi_features_padded])

        combined_x_idx = [[], [], []]
        combined_x_idx[0] = np.arange(0,graph1.x.shape[0]) # graph 1 node indices
        combined_x_idx[1] = np.arange(graph1.x.shape[0],graph1.x.shape[0]+graph2.x.shape[0]) # graph 2 node indices
        combined_x_idx[2] = np.arange(graph1.x.shape[0]+graph2.x.shape[0],graph1.x.shape[0]+graph2.x.shape[0]+1) # y node index


        idx_dict["node"] = combined_x_idx

        # Create edges between each node in graph1 and the "Catalyst Score" node
        graph1_to_y_edge_index = torch.cat([
            torch.tensor([[i, ddi_features_node_index] for i in range(graph1.x.shape[0])])
        ], dim=0).t().contiguous()

        '''
            Create edges between each node in graph2 and the "Catalyst Score" node. Note that a graph.x.shape[0] offset
            is required so that the edge indices are offset
        '''
        graph2_to_y_edge_index = torch.cat([
            torch.tensor([[i + graph1.x.shape[0], ddi_features_node_index] for i in range(graph2.x.shape[0])])
        ], dim=0).t().contiguous()

        # Combine the edge indices
        combined_edge_index = torch.cat([graph1.edge_index, graph2.edge_index + graph1.x.shape[0], graph1_to_y_edge_index, graph2_to_y_edge_index], dim=1)

        combined_edge_index_idx = [[], [], []]
        combined_edge_index_idx[0] = np.arange(0,graph1.edge_index.shape[1]) # graph 1 edge index' indices
        combined_edge_index_idx[1] = np.arange(graph1.edge_index.shape[1],graph1.edge_index.shape[1]+graph2.edge_index.shape[1]) # graph 2 edge index' indices
        combined_edge_index_idx[2] = np.arange(graph1.edge_index.shape[1]+graph2.edge_index.shape[1],
                            graph1.edge_index.shape[1]+graph2.edge_index.shape[1]+graph1_to_y_edge_index.shape[1]+graph2_to_y_edge_index.shape[1]) # y edge index' index

        idx_dict["edge_index"] = combined_edge_index_idx

        # Edge connections from every other node to Y node
        graph1_to_y_edge_attr = torch.ones((graph1_to_y_edge_index.shape[1], graph1.edge_attr.shape[1]), dtype=torch.float)
        graph2_to_y_edge_attr = torch.ones((graph2_to_y_edge_index.shape[1], graph2.edge_attr.shape[1]), dtype=torch.float)

        # Combine the edge attributes
        combined_edge_attr = torch.cat([graph1.edge_attr, graph2.edge_attr, graph1_to_y_edge_attr, graph2_to_y_edge_attr])

        combined_edge_attr_idx = [[], [], []]
        combined_edge_attr_idx[0] = np.arange(0,graph1.edge_attr.shape[0]) # graph 1 edge attr' indices
        combined_edge_attr_idx[1] = np.arange(graph1.edge_attr.shape[0],graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0]) # graph 2 edge attr' indices
        combined_edge_attr_idx[2] = np.arange(graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0],
                            graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0]+graph1_to_y_edge_attr.shape[0]+graph2_to_y_edge_attr.shape[0]) # y edge attr' index

        idx_dict["edge"] = combined_edge_attr_idx

        # Create the combined graph
        combined_graph = Data(x=combined_x, edge_index=combined_edge_index, edge_attr=combined_edge_attr, y=ddi_features)
        graphs2.append(combined_graph)
        indices2.append(idx_dict)

    except Exception as e:
        continue  # Skip this row and proceed to the next one



# Call the function with your data
# process_data(your_data)


In [None]:
print(len(graphs2))
print(len(indices2))

155
155


In [None]:
data2 = []
for graph, index in zip(graphs2, indices2):
    graph.indices = index
    data2.append(graph)

graph2_loader = DataLoader(data2, batch_size=1, shuffle=False,  drop_last=False)
print(len(data2))

155


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ddi_model.to(device)

DDIGNN(
  (conv1): NNConv(3, 40, aggr=add, nn=Linear(in_features=2, out_features=120, bias=True))
  (bn1): BatchNorm(40)
  (conv1b): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn1b): BatchNorm(40)
  (conv2): NNConv(3, 40, aggr=add, nn=Linear(in_features=2, out_features=120, bias=True))
  (bn2): BatchNorm(40)
  (conv2b): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn2b): BatchNorm(40)
  (conv3): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn3): BatchNorm(40)
  (fc1): Linear(in_features=40, out_features=40, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=40, out_features=2, bias=True)
)

In [None]:
ddi_predictions = []
with torch.no_grad():
    for d in graph2_loader:
        d = d.to(device)
        output = ddi_model(d)
        ddi_predictions.extend(output.cpu().numpy())

In [None]:
len(ddi_predictions)

310

In [None]:
ddi_predictions

# Effectiveness Predictions

In [None]:
import numpy as np
import torch
from rdkit import Chem
from torch_geometric.data import Data

def target_mapping(target):
  #to do
  pass


graphs3 = []
indices3 = []

def mol_to_graph(target_encoded):

  for _, row in df.iterrows():
      idx_dict = {}
      if isinstance(row['Drug'], str):
          molecule = Chem.MolFromSmiles(row['Drug'])
          graph = molecule_to_graph(molecule)

          required_feature_size = 1  # The total number of features per node

          # Create a feature vector for the "Catalyst Score" node
          # Initialize a vector of zeros with the required_feature_size
          catalyst_score_features = torch.zeros(1, required_feature_size)
          # catalyst_score_features[0, 0] = row['Y'] -- replaced with zero
          catalyst_score_node_index = graph.x.shape[0]

          pad_size = graph.x.shape[1] - catalyst_score_features.shape[1]
          catalyst_score_features_padded = torch.cat([catalyst_score_features, torch.zeros((catalyst_score_features.shape[0], pad_size))], dim=1)

          combined_x = torch.cat([graph.x, catalyst_score_features_padded], dim=0)

          combined_x_idx = [[], []]
          combined_x_idx[0] = np.arange(0, graph.x.shape[0])  # graph 1 node indices
          combined_x_idx[1] = np.arange(graph.x.shape[0], graph.x.shape[0]+1)  # y index

          idx_dict["node"] = combined_x_idx

          additional_edge_index = torch.cat([
              torch.tensor([[i, catalyst_score_node_index] for i in range(graph.x.shape[0])]),
          ], dim=0).t().contiguous()

          additional_edge_attr = torch.ones((additional_edge_index.shape[1], graph.edge_attr.shape[1]), dtype=torch.float)

          combined_edge_index = torch.cat([graph.edge_index, additional_edge_index], dim=1)

          combined_edge_index_idx = [[], []]
          combined_edge_index_idx[0] = np.arange(0, graph.edge_index.shape[1])  # graph 1 edge index' indices
          combined_edge_index_idx[1] = np.arange(graph.edge_index.shape[1],
                                                graph.edge_index.shape[1] + additional_edge_index.shape[1])  # y edge index' index

          idx_dict["edge_index"] = combined_edge_index_idx

          combined_edge_attr = torch.cat([graph.edge_attr, additional_edge_attr], dim=0)
          combined_edge_attr_idx = [[], []]
          combined_edge_attr_idx[0] = np.arange(0, graph.edge_attr.shape[0])  # graph 1 edge attr' indices
          combined_edge_attr_idx[1] = np.arange(graph.edge_attr.shape[0],
                                                graph.edge_attr.shape[0] + additional_edge_attr.shape[0])  # y edge attr' index

          idx_dict["edge"] = combined_edge_attr_idx
          combined_graph = Data(x=combined_x, edge_index=combined_edge_index, edge_attr=combined_edge_attr, y=catalyst_score_features)
          combined_graph.target = torch.Tensor([[target_encoded]])

          graphs3.append(combined_graph)
          indices3.append(idx_dict)


In [None]:
mol_to_graph(0.426667)

In [None]:
print(len(graphs3))
print(len(indices3))

155
155


In [None]:
data3 = []
for graph, index in zip(graphs3, indices3):
    graph.indices = index
    data3.append(graph)

graph3_loader = DataLoader(data3, batch_size=1, shuffle=False,  drop_last=False)
print(len(data3))

155


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
effectiveness_model.to(device)

EffectiveGNN(
  (conv1): NNConv(3, 8, aggr=add, nn=Linear(in_features=2, out_features=24, bias=True))
  (bn1): BatchNorm(8)
  (conv1b): NNConv(8, 8, aggr=add, nn=Linear(in_features=2, out_features=64, bias=True))
  (bn1b): BatchNorm(8)
  (conv2): NNConv(8, 8, aggr=add, nn=Linear(in_features=2, out_features=64, bias=True))
  (bn2): BatchNorm(8)
  (fc1): Linear(in_features=8, out_features=8, bias=True)
  (fc2): Linear(in_features=8, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [None]:
effectiveness_predictions = []
with torch.no_grad():
    for d in graph3_loader:
        d = d.to(device)
        output = effectiveness_model(d)
        effectiveness_predictions.extend(output.cpu().numpy())

In [None]:
effectiveness_predictions

[5.018417,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.3117213,
 3.31

# Catalyst Predictions

In [38]:
import numpy as np
import torch
from rdkit import Chem
from torch_geometric.data import Data

graphs4 = []
indices4 = []

for _, row in df.iterrows():
    idx_dict = {}
    # Convert SMILES to molecule objects
    molecule1 = Chem.MolFromSmiles(row['Drug1'])
    molecule2 = Chem.MolFromSmiles(row['Drug2'])

    # Convert molecules to graph representations
    graph1 = molecule_to_graph(molecule1)
    graph2 = molecule_to_graph(molecule2)

    # Create the "Catalyst Score" node
    catalyst_score_features = torch.zeros(1, 6)
    catalyst_score_node_index = graph1.x.shape[0] + graph2.x.shape[0]

    # Pad the node features of graph1 and graph2 to match the size of the "Catalyst Score" node features
    pad_size = catalyst_score_features.shape[1] - graph1.x.shape[1]
    graph1_x_padded = torch.cat([graph1.x, torch.zeros((graph1.x.shape[0], pad_size))], dim=1)
    graph2_x_padded = torch.cat([graph2.x, torch.zeros((graph2.x.shape[0], pad_size))], dim=1)

    # Combine the padded node features of graph1, graph2, and the "Catalyst Score" node
    combined_x = torch.cat([graph1_x_padded, graph2_x_padded, catalyst_score_features])

    combined_x_idx = [[], [], []]
    combined_x_idx[0] = np.arange(0,graph1.x.shape[0]) # graph 1 node indices
    combined_x_idx[1] = np.arange(graph1.x.shape[0],graph1.x.shape[0]+graph2.x.shape[0]) # graph 2 node indices
    combined_x_idx[2] = np.arange(graph1.x.shape[0]+graph2.x.shape[0],graph1.x.shape[0]+graph2.x.shape[0]+1) # y node index

    idx_dict["node"] = combined_x_idx

    # Create edges between each node in graph1 and the "Catalyst Score" node
    graph1_to_y_edge_index = torch.cat([
        torch.tensor([[i, catalyst_score_node_index] for i in range(graph1.x.shape[0])])
    ], dim=0).t().contiguous()

    '''
        Create edges between each node in graph2 and the "Catalyst Score" node. Note that a graph.x.shape[0] offset
        is required so that the edge indices are offset
    '''
    graph2_to_y_edge_index = torch.cat([
        torch.tensor([[i + graph1.x.shape[0], catalyst_score_node_index] for i in range(graph2.x.shape[0])])
    ], dim=0).t().contiguous()

    # Combine the edge indices
    combined_edge_index = torch.cat([graph1.edge_index, graph2.edge_index + graph1.x.shape[0], graph1_to_y_edge_index, graph2_to_y_edge_index], dim=1)

    combined_edge_index_idx = [[], [], []]
    combined_edge_index_idx[0] = np.arange(0,graph1.edge_index.shape[1]) # graph 1 edge index' indices
    combined_edge_index_idx[1] = np.arange(graph1.edge_index.shape[1],graph1.edge_index.shape[1]+graph2.edge_index.shape[1]) # graph 2 edge index' indices
    combined_edge_index_idx[2] = np.arange(graph1.edge_index.shape[1]+graph2.edge_index.shape[1],
                        graph1.edge_index.shape[1]+graph2.edge_index.shape[1]+graph1_to_y_edge_index.shape[1]+graph2_to_y_edge_index.shape[1]) # y edge index' index

    idx_dict["edge_index"] = combined_edge_index_idx

    # Edge connections from every other node to Y node
    graph1_to_y_edge_attr = torch.ones((graph1_to_y_edge_index.shape[1], graph1.edge_attr.shape[1]), dtype=torch.float)
    graph2_to_y_edge_attr = torch.ones((graph2_to_y_edge_index.shape[1], graph2.edge_attr.shape[1]), dtype=torch.float)

    # Combine the edge attributes
    combined_edge_attr = torch.cat([graph1.edge_attr, graph2.edge_attr, graph1_to_y_edge_attr, graph2_to_y_edge_attr])

    combined_edge_attr_idx = [[], [], []]
    combined_edge_attr_idx[0] = np.arange(0,graph1.edge_attr.shape[0]) # graph 1 edge attr' indices
    combined_edge_attr_idx[1] = np.arange(graph1.edge_attr.shape[0],graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0]) # graph 2 edge attr' indices
    combined_edge_attr_idx[2] = np.arange(graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0],
                        graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0]+graph1_to_y_edge_attr.shape[0]+graph2_to_y_edge_attr.shape[0]) # y edge attr' index

    idx_dict["edge"] = combined_edge_attr_idx

    # Create the combined graph
    combined_graph = Data(x=combined_x, edge_index=combined_edge_index, edge_attr=combined_edge_attr, y=catalyst_score_features)
    graphs4.append(combined_graph)
    indices4.append(idx_dict)
print(len(graphs4))



155


In [39]:
data4 = []
for graph, index in zip(graphs4, indices4):
    graph.indices = index
    data4.append(graph)

graph4_loader = DataLoader(data4, batch_size=1, shuffle=False,  drop_last=False)
print(len(data4))

155


In [40]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
catalyst_model.to(device)

SynergyGNN(
  (conv1): NNConv(6, 40, aggr=add, nn=Linear(in_features=2, out_features=240, bias=True))
  (bn1): BatchNorm(40)
  (conv1b): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn1b): BatchNorm(40)
  (conv2): NNConv(6, 40, aggr=add, nn=Linear(in_features=2, out_features=240, bias=True))
  (bn2): BatchNorm(40)
  (conv2b): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn2b): BatchNorm(40)
  (conv3): NNConv(40, 40, aggr=add, nn=Linear(in_features=2, out_features=1600, bias=True))
  (bn3): BatchNorm(40)
  (fc1): Linear(in_features=40, out_features=40, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=40, out_features=6, bias=True)
)

In [46]:
catalyst_predictions = []
with torch.no_grad():
    for d in graph4_loader:
        d = d.to(device)
        output = catalyst_model(d)
        catalyst_predictions.append(output.cpu().numpy())

tensor([771.6228,   4.2268,  -1.0000,  -1.0000,  13.1785,  -1.0000])
tensor([ 2.4209e+02, -1.3590e-01, -9.9998e-01, -1.0000e+00, -8.5645e-01,
        -1.0000e+00])
tensor([216.9659,   1.6439,  -0.9917,  -1.0000,   4.9337,  -1.0000])
tensor([-0.9992, -0.1190,  0.0243,  0.6210, -0.2396, -0.1415])
tensor([504.4847,   3.3720,  -1.0000,  -1.0000,  10.6343,  -1.0000])
tensor([-0.9998, -0.1714, -0.0592,  0.7960, -0.3524, -0.3466])
tensor([-0.9998, -0.1613, -0.0434,  0.7923, -0.3369, -0.2928])
tensor([105.3173,   1.8445,   0.1199,  -1.0000,   5.2971,   0.4638])
tensor([523.0502,   3.4648,  -1.0000,  -1.0000,  10.8480,  -1.0000])
tensor([-0.9981, -0.0861,  0.0709,  0.5225, -0.1673,  0.0124])
tensor([ 9.7785,  0.4556,  0.6747, -0.7213,  1.2217,  2.2252])
tensor([ 7.5270,  0.4654,  0.7810, -0.5920,  1.2566,  2.8003])
tensor([162.3618,  -0.5955,  -0.9999,  -1.0000,  -0.9927,  -1.0000])
tensor([261.4974,   2.7896,  -0.9772,  -1.0000,   8.5230,  -1.0000])
tensor([68.8772,  1.8454,  1.5403, -0.9997, 

In [47]:
catalyst_predictions

[array([771.6228   ,   4.2267838,  -1.       ,  -1.       ,  13.178467 ,
         -1.       ], dtype=float32),
 array([ 2.4208940e+02, -1.3590103e-01, -9.9997997e-01, -1.0000000e+00,
        -8.5645056e-01, -1.0000000e+00], dtype=float32),
 array([216.96588   ,   1.6438618 ,  -0.99165404,  -1.        ,
          4.933653  ,  -1.        ], dtype=float32),
 array([-0.99924564, -0.11904496,  0.02428607,  0.6209922 , -0.2396239 ,
        -0.14151388], dtype=float32),
 array([504.4847    ,   3.3719711 ,  -0.99999446,  -1.        ,
         10.634279  ,  -1.        ], dtype=float32),
 array([-0.99981105, -0.17143017, -0.05919474,  0.79604495, -0.3523811 ,
        -0.34655213], dtype=float32),
 array([-0.99977034, -0.16125399, -0.04339999,  0.79230845, -0.3369103 ,
        -0.29280084], dtype=float32),
 array([105.31731   ,   1.844468  ,   0.11989611,  -0.99999726,
          5.2971134 ,   0.46380264], dtype=float32),
 array([523.05023  ,   3.4647634,  -0.9999968,  -1.       ,  10.848004 ,
   