In [None]:
!pip install rdkit
!pip install networkx
!pip install torch-geometric

In [None]:
import torch
import torch.nn as nn
import gdown
import pandas as pd
import torch
from rdkit import Chem
from torch_geometric.data import Data, Batch
import networkx as nx
import matplotlib.pyplot as plt
from torch_geometric.utils import to_networkx

# DSI Pickle

In [None]:
import torch
from torch_geometric.nn import GCNConv, NNConv, BatchNorm
from torch_geometric.data import Data

class DSIGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, edge_channels):
        super(DSIGNN, self).__init__()
        self.conv1 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn1 = BatchNorm(hidden_channels)

        self.conv1b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn1b = BatchNorm(hidden_channels)

        self.conv2 = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn2 = BatchNorm(hidden_channels)

        self.fc1 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.fc2 = torch.nn.Linear(hidden_channels, out_channels)
        self.dropout = torch.nn.Dropout(p=0.5)

    def l2_regularization(self):
        l2_reg = 0
        for param in self.parameters():
            l2_reg += torch.sum(param ** 2)
        return l2_reg

    def forward(self, graph):
        v=graph.x
        edge_index = graph.edge_index
        e = graph.edge_attr
        indices = graph.indices

        v1 = v[torch.tensor(indices["node"][0][0])]

        e1 = e[torch.tensor(indices["edge"][0][0])]
        e2 = e[torch.tensor(indices["edge"][0][1])]

        eidx1 = edge_index[:, torch.tensor(indices["edge_index"][0][0])]
        eidx2 = edge_index[:, torch.tensor(indices["edge_index"][0][1])]

        graph1 = self.conv1(v1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph1 = self.conv1b(graph1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)


        combined_nodes = torch.cat((graph1, torch.zeros(1, 40)), dim=0)
        v2 = self.conv2(combined_nodes, eidx2, e2)
        v2 = self.bn2(v2)
        v2 = torch.nn.functional.elu(v2)
        v2 = self.dropout(v2[-1])

        v2 = self.fc1(v2)
        v2 = torch.nn.functional.elu(v2)

        v2 = self.fc2(v2)
        v2 = torch.nn.functional.elu(v2)

        return v2

In [None]:
import pickle
from google.colab import drive

drive.mount('/content/drive')

# Load the model
with open('/content/drive/MyDrive/CS 566/Pickle Files/dsi_model.pkl', 'rb') as f:
    dsi_model = pickle.load(f)

# Put the model in evaluation mode
dsi_model.eval()

# DDI Pickle

In [None]:
class DDIGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, edge_channels):
        super(DDIGNN, self).__init__()
        self.conv1 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn1 = BatchNorm(hidden_channels)

        self.conv1b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn1b = BatchNorm(hidden_channels)

        self.conv2 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn2 = BatchNorm(hidden_channels)

        self.conv2b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn2b = BatchNorm(hidden_channels)

        self.conv3 = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn3 = BatchNorm(hidden_channels)

        self.fc1 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.dropout = torch.nn.Dropout(p=0.5)

        self.fc2 = torch.nn.Linear(hidden_channels, out_channels)

    def l2_regularization(self):
        l2_reg = 0
        for param in self.parameters():
            l2_reg += torch.sum(param ** 2)
        return l2_reg

    def forward(self, graph):
        v=graph.x
        edge_index = graph.edge_index
        e = graph.edge_attr
        indices = graph.indices

        v1 = v[torch.tensor(indices["node"][0][0])]
        v2 = v[torch.tensor(indices["node"][0][1])]

        e1 = e[torch.tensor(indices["edge"][0][0])]
        e2 = e[torch.tensor(indices["edge"][0][1])]
        e3 = e[torch.tensor(indices["edge"][0][2])]

        eidx1 = edge_index[:, torch.tensor(indices["edge_index"][0][0])]
        eidx2 = torch.sub(edge_index[:, torch.tensor(indices["edge_index"][0][1])], v1.shape[0])
        eidx3 = edge_index[:, torch.tensor(indices["edge_index"][0][2])]

        graph1 = self.conv1(v1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph1 = self.conv1b(graph1, eidx1, e1)
        graph1 = self.bn1b(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph2 = self.conv2(v2, eidx2, e2)
        graph2 = self.bn2(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        graph2 = self.conv2b(graph2, eidx2, e2)
        graph2 = self.bn2b(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        combined_nodes = torch.cat((graph1, graph2, torch.zeros(1, graph1.shape[1])), dim=0)
        v3 = self.conv3(combined_nodes, eidx3, e3)
        v3 = self.bn3(v3)
        v3 = torch.nn.functional.elu(v3)
        v3 = self.dropout(v3[-1])

        v3 = self.fc1(v3)
        v3 = torch.nn.functional.elu(v3)

        v3 = self.fc2(v3)
        v3 = torch.nn.functional.elu(v3)

        return v3

In [None]:
with open('/content/drive/MyDrive/CS 566/Pickle Files/ddi_model.pkl', 'rb') as f:
    ddi_model = pickle.load(f)

# Put the model in evaluation mode
ddi_model.eval()

# Effectiveness Pickle

In [None]:
class EffectiveGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, edge_channels):
        super(EffectiveGNN, self).__init__()
        self.conv1 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn1 = BatchNorm(hidden_channels)

        self.conv1b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn1b = BatchNorm(hidden_channels)

        self.conv2 = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn2 = BatchNorm(hidden_channels)

        self.fc1 = torch.nn.Linear(hidden_channels, hidden_channels)
        self.fc2 = torch.nn.Linear(hidden_channels, out_channels)
        self.dropout = torch.nn.Dropout(p=0.5)

    def l2_regularization(self):
        l2_reg = 0
        for param in self.parameters():
            l2_reg += torch.sum(param ** 2)
        return l2_reg

    def forward(self, graph):
        v=graph.x
        edge_index = graph.edge_index
        e = graph.edge_attr
        indices = graph.indices

        v1 = v[torch.tensor(indices["node"][0][0])]

        e1 = e[torch.tensor(indices["edge"][0][0])]
        e2 = e[torch.tensor(indices["edge"][0][1])]

        eidx1 = edge_index[:, torch.tensor(indices["edge_index"][0][0])]
        eidx2 = edge_index[:, torch.tensor(indices["edge_index"][0][1])]

        graph1 = self.conv1(v1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph1 = self.conv1b(graph1, eidx1, e1)
        graph1 = self.bn1b(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        combined_nodes = torch.cat((graph1, torch.zeros(1, graph1.shape[1])), dim=0)

        graph2 = self.conv2(combined_nodes, eidx2, e2)
        graph2 = self.bn2(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        graph2 = self.dropout(graph2[-1])
        graph2 = self.fc1(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        graph2 = self.fc2(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        return graph2

In [None]:
drive.mount('/content/drive', force_remount=True)

with open('/content/drive/MyDrive/CS 566/Pickle Files/effectiveness_model.pkl', 'rb') as f:
    effectiveness_model = pickle.load(f)

# Put the model in evaluation mode
effectiveness_model.eval()

# Synergy Pickle

In [None]:
import torch
from torch_geometric.nn import GCNConv, NNConv, BatchNorm
from torch_geometric.data import Data

class SynergyGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, edge_channels):
        super(SynergyGNN, self).__init__()
        self.conv1 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn1 = BatchNorm(hidden_channels)

        self.conv1b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn1b = BatchNorm(hidden_channels)

        self.conv2 = NNConv(in_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, in_channels * hidden_channels))
        self.bn2 = BatchNorm(hidden_channels)

        self.conv2b = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn2b = BatchNorm(hidden_channels)

        self.conv3 = NNConv(hidden_channels, hidden_channels, nn=torch.nn.Linear(edge_channels, hidden_channels * hidden_channels))
        self.bn3 = BatchNorm(hidden_channels)

        self.dropout = torch.nn.Dropout(p=0.5)

        self.fc2 = torch.nn.Linear(hidden_channels, out_channels)

    def l2_regularization(self):
        l2_reg = 0
        for param in self.parameters():
            l2_reg += torch.sum(param ** 2)
        return l2_reg

    def forward(self, graph):
        v=graph.x
        edge_index = graph.edge_index
        e = graph.edge_attr
        indices = graph.indices

        v1 = v[torch.tensor(indices["node"][0][0])]
        v2 = v[torch.tensor(indices["node"][0][1])]

        e1 = e[torch.tensor(indices["edge"][0][0])]
        e2 = e[torch.tensor(indices["edge"][0][1])]
        e3 = e[torch.tensor(indices["edge"][0][2])]

        eidx1 = edge_index[:, torch.tensor(indices["edge_index"][0][0])]
        eidx2 = torch.sub(edge_index[:, torch.tensor(indices["edge_index"][0][1])], v1.shape[0])
        eidx3 = edge_index[:, torch.tensor(indices["edge_index"][0][2])]

        graph1 = self.conv1(v1, eidx1, e1)
        graph1 = self.bn1(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph1 = self.conv1b(graph1, eidx1, e1)
        graph1 = self.bn1b(graph1)
        graph1 = torch.nn.functional.elu(graph1)

        graph2 = self.conv2(v2, eidx2, e2)
        graph2 = self.bn2(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        graph2 = self.conv2b(graph2, eidx2, e2)
        graph2 = self.bn2b(graph2)
        graph2 = torch.nn.functional.elu(graph2)

        combined_nodes = torch.cat((graph1, graph2, torch.zeros(1, graph1.shape[1])), dim=0)
        v3 = self.conv3(combined_nodes, eidx3, e3)
        v3 = self.bn3(v3)
        v3 = torch.nn.functional.elu(v3)
        v3 = self.dropout(v3[-1])

        v3 = self.fc2(v3)
        v3 = torch.nn.functional.elu(v3)

        return v3

In [None]:
with open('/content/drive/MyDrive/CS 566/Pickle Files/catalyst_model.pkl', 'rb') as f:
    catalyst_model = pickle.load(f)

# Put the model in evaluation mode
catalyst_model.eval()

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('/content/drive/MyDrive/CS 566/Notebooks/three_merged.csv')

In [None]:
df=df.drop(['Unnamed: 0', 'Drug_ID_y', 'Drug1_ID_1', 'Drug2_ID_1', 'Drug2_ID_2', 'Drug1_ID_2' ], axis=1)

In [None]:
df=df.rename(columns={"Drug_ID_x": "Drug_ID", "Y_x": "Solubility", "Drug_canon": "Drug_standardized", "Y_y":"Binding_Affinity", "Y_1":"Drug_Interaction", "Y.1_1":"Description", "Y.1_2":"Side_Effect" })

In [None]:
import pandas as pd
harsh = pd.read_csv('/content/drive/MyDrive/CS 566/Notebooks/Updated_effectiveness.csv')

mydict = harsh.set_index('Target_ID')['Target_encoded'].to_dict()
df['Target_encoded'] = df['Target_ID'].map(mydict)

In [None]:
def molecule_to_graph(molecule):
    num_atoms = molecule.GetNumAtoms()
    x = torch.tensor([atom_feature_vector(atom) for atom in molecule.GetAtoms()], dtype=torch.float)
    edge_index = []
    edge_attr = []
    for bond in molecule.GetBonds():
        edge_index.extend([[bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()], [bond.GetEndAtomIdx(), bond.GetBeginAtomIdx()]])
        edge_attr.extend([bond_feature_vector(bond), bond_feature_vector(bond)])
    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    edge_attr = torch.tensor(edge_attr, dtype=torch.float)
    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr)

def atom_feature_vector(atom):
    return [atom.GetAtomicNum(), atom.GetDegree(), atom.GetHybridization()]

def bond_feature_vector(bond):
    return [bond.GetBondTypeAsDouble(), bond.IsInRing()]

def visualize(graph):
    nx_graph = to_networkx(graph, to_undirected=True)

    fig = plt.figure(figsize=(10, 10))
    pos = nx.spring_layout(nx_graph)

    nx.draw_networkx(nx_graph, pos, with_labels=True, node_size=500, font_size=12, font_weight='bold')

    if 'edge_attr' in next(iter(nx_graph.edges(data=True)))[-1]:
        edge_labels = {(u, v): d['edge_attr'] for u, v, d in nx_graph.edges(data=True)}
        nx.draw_networkx_edge_labels(nx_graph, pos, edge_labels=edge_labels, font_size=10)

    plt.axis('off')
    plt.show()

# DSI Predictions

In [None]:
import numpy as np
graphs1 = []

for _, row in df.iterrows():
    try:
        idx_dict = {}
        if isinstance(row['Drug'], str):
            molecule = Chem.MolFromSmiles(row['Drug'])
        graph = molecule_to_graph(molecule)

        required_feature_size = graph.x.shape[1]  # The total number of features per node

        # Initialize a vector of zeros with the required_feature_size
        # Since there is no 'Y' column, we use only zeros
        ddi_score_features = torch.zeros(1, required_feature_size)

        ddi_score_node_index = graph.x.shape[0]

        combined_x = torch.cat([graph.x, ddi_score_features], dim=0)
        combined_x_idx = [[], []]
        combined_x_idx[0] = np.arange(0, graph.x.shape[0])  # graph 1 node indices
        combined_x_idx[1] = np.arange(graph.x.shape[0], graph.x.shape[0] + 1)  # index for the additional zeros

        idx_dict["node"] = combined_x_idx

        # Creating additional edges connecting the new node to all existing nodes
        additional_edge_index = torch.cat([
            torch.tensor([[i, ddi_score_node_index] for i in range(graph.x.shape[0])])], dim=0).t().contiguous()

        # Assuming all additional edges have the same attributes, such as a weight of 1
        additional_edge_attr = torch.ones((additional_edge_index.shape[1], graph.edge_attr.shape[1]), dtype=torch.float)

        combined_edge_index = torch.cat([graph.edge_index, additional_edge_index], dim=1)
        combined_edge_index_idx = [[], []]
        combined_edge_index_idx[0] = np.arange(0, graph.edge_index.shape[1])  # Original edges' indices
        combined_edge_index_idx[1] = np.arange(graph.edge_index.shape[1], graph.edge_index.shape[1] + additional_edge_index.shape[1])  # New edges' indices

        idx_dict["edge_index"] = combined_edge_index_idx

        combined_edge_attr = torch.cat([graph.edge_attr, additional_edge_attr], dim=0)
        combined_edge_attr_idx = [[], []]
        combined_edge_attr_idx[0] = np.arange(0, graph.edge_attr.shape[0])  # Original edge attributes' indices
        combined_edge_attr_idx[1] = np.arange(graph.edge_attr.shape[0], graph.edge_attr.shape[0] + additional_edge_attr.shape[0])  # New edge attributes' indices

        idx_dict["edge"] = combined_edge_attr_idx

        # Create the final graph data object with the combined nodes, edges, and attributes
        combined_graph = Data(x=combined_x, edge_index=combined_edge_index, edge_attr=combined_edge_attr, y=ddi_score_features)
        combined_graph.indices = idx_dict
        graphs1.append(combined_graph)

    except Exception as e:
        print(f"Error processing row: {e}")
        continue



In [None]:
from torch_geometric.loader import DataLoader

graph1_loader = DataLoader(graphs1, batch_size=1, shuffle=False,  drop_last=False)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dsi_model.to(device)

In [None]:
dsi_predictions = []
with torch.no_grad():
    for d in graph1_loader:
        d = d.to(device)
        output = dsi_model(d)
        dsi_predictions.extend(output.cpu().numpy())

In [None]:
df['Solubility_pred']=dsi_predictions
print(set(dsi_predictions))

# DDI Predictions

In [None]:
import torch
from rdkit import Chem
from torch_geometric.data import Data
import numpy as np
# Define function placeholders (atom_feature_vector, bond_feature_vector, molecule_to_graph) here
graphs2 = []
indices2 = []
count = 0

for _, row in df.iterrows():
    idx_dict = {}
    try:
        molecule1 = Chem.MolFromSmiles(row['Drug1'])
        molecule2 = Chem.MolFromSmiles(row['Drug2'])

        # Check if molecules are None (indicating parsing failure)
        if molecule1 is None or molecule2 is None:
            print("Parsing error: Skipping row due to invalid SMILES.")
            continue

        graph1 = molecule_to_graph(molecule1)
        graph2 = molecule_to_graph(molecule2)

        ddi_features = torch.zeros((1, 2), dtype=torch.float)

        ddi_features_node_index = graph1.x.shape[0] + graph2.x.shape[0]
        pad_size = graph1.x.shape[1] - ddi_features.shape[1]

        ddi_features_padded = torch.cat([ddi_features,torch.zeros((ddi_features.shape[0],pad_size))],dim=1)

        combined_x = torch.cat([graph1.x, graph2.x, ddi_features_padded])

        combined_x_idx = [[], [], []]
        combined_x_idx[0] = np.arange(0,graph1.x.shape[0]) # graph 1 node indices
        combined_x_idx[1] = np.arange(graph1.x.shape[0],graph1.x.shape[0]+graph2.x.shape[0]) # graph 2 node indices
        combined_x_idx[2] = np.arange(graph1.x.shape[0]+graph2.x.shape[0],graph1.x.shape[0]+graph2.x.shape[0]+1) # y node index


        idx_dict["node"] = combined_x_idx

        # Create edges between each node in graph1 and the "Catalyst Score" node
        graph1_to_y_edge_index = torch.cat([
            torch.tensor([[i, ddi_features_node_index] for i in range(graph1.x.shape[0])])
        ], dim=0).t().contiguous()

        '''
            Create edges between each node in graph2 and the "Catalyst Score" node. Note that a graph.x.shape[0] offset
            is required so that the edge indices are offset
        '''
        graph2_to_y_edge_index = torch.cat([
            torch.tensor([[i + graph1.x.shape[0], ddi_features_node_index] for i in range(graph2.x.shape[0])])
        ], dim=0).t().contiguous()

        # Combine the edge indices
        combined_edge_index = torch.cat([graph1.edge_index, graph2.edge_index + graph1.x.shape[0], graph1_to_y_edge_index, graph2_to_y_edge_index], dim=1)

        combined_edge_index_idx = [[], [], []]
        combined_edge_index_idx[0] = np.arange(0,graph1.edge_index.shape[1]) # graph 1 edge index' indices
        combined_edge_index_idx[1] = np.arange(graph1.edge_index.shape[1],graph1.edge_index.shape[1]+graph2.edge_index.shape[1]) # graph 2 edge index' indices
        combined_edge_index_idx[2] = np.arange(graph1.edge_index.shape[1]+graph2.edge_index.shape[1],
                            graph1.edge_index.shape[1]+graph2.edge_index.shape[1]+graph1_to_y_edge_index.shape[1]+graph2_to_y_edge_index.shape[1]) # y edge index' index

        idx_dict["edge_index"] = combined_edge_index_idx

        # Edge connections from every other node to Y node
        graph1_to_y_edge_attr = torch.ones((graph1_to_y_edge_index.shape[1], graph1.edge_attr.shape[1]), dtype=torch.float)
        graph2_to_y_edge_attr = torch.ones((graph2_to_y_edge_index.shape[1], graph2.edge_attr.shape[1]), dtype=torch.float)

        # Combine the edge attributes
        combined_edge_attr = torch.cat([graph1.edge_attr, graph2.edge_attr, graph1_to_y_edge_attr, graph2_to_y_edge_attr])

        combined_edge_attr_idx = [[], [], []]
        combined_edge_attr_idx[0] = np.arange(0,graph1.edge_attr.shape[0]) # graph 1 edge attr' indices
        combined_edge_attr_idx[1] = np.arange(graph1.edge_attr.shape[0],graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0]) # graph 2 edge attr' indices
        combined_edge_attr_idx[2] = np.arange(graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0],
                            graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0]+graph1_to_y_edge_attr.shape[0]+graph2_to_y_edge_attr.shape[0]) # y edge attr' index

        idx_dict["edge"] = combined_edge_attr_idx

        # Create the combined graph
        combined_graph = Data(x=combined_x, edge_index=combined_edge_index, edge_attr=combined_edge_attr, y=ddi_features)
        graphs2.append(combined_graph)
        indices2.append(idx_dict)

    except Exception as e:
        continue  # Skip this row and proceed to the next one



# Call the function with your data
# process_data(your_data)


In [None]:
data2 = []
for graph, index in zip(graphs2, indices2):
    graph.indices = index
    data2.append(graph)

graph2_loader = DataLoader(data2, batch_size=1, shuffle=False,  drop_last=False)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
ddi_model.to(device)

In [None]:
ddi_predictions = []
with torch.no_grad():
    for d in graph2_loader:
        d = d.to(device)
        output = ddi_model(d)
        ddi_predictions.extend(output.cpu().numpy())

In [None]:
def chunk_list(lst, chunk_size):
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

# Chunking the list
chunked_list = chunk_list(ddi_predictions, 2)

# Creating a DataFrame from the chunked list
new_df = pd.DataFrame(chunked_list, columns=['Y_1_pred', 'Y_2_pred'])

# Appending the new DataFrame to the existing DataFrame
df['Y_1_pred'] = new_df['Y_1_pred']
df['Y_2_pred'] = new_df['Y_2_pred']

# Effectiveness Predictions

In [None]:
import numpy as np
import torch
from rdkit import Chem
from torch_geometric.data import Data

def target_mapping(target):
  #to do
  pass

graphs3 = []
indices3 = []

for _, row in df.iterrows():
    idx_dict = {}
    if isinstance(row['Drug'], str):
        molecule = Chem.MolFromSmiles(row['Drug'])
        graph = molecule_to_graph(molecule)

        required_feature_size = 1  # The total number of features per node

        # Create a feature vector for the "Catalyst Score" node
        # Initialize a vector of zeros with the required_feature_size
        catalyst_score_features = torch.zeros(1, required_feature_size)
        # catalyst_score_features[0, 0] = row['Y'] -- replaced with zero
        catalyst_score_node_index = graph.x.shape[0]

        pad_size = graph.x.shape[1] - catalyst_score_features.shape[1]
        catalyst_score_features_padded = torch.cat([catalyst_score_features, torch.zeros((catalyst_score_features.shape[0], pad_size))], dim=1)

        combined_x = torch.cat([graph.x, catalyst_score_features_padded], dim=0)

        combined_x_idx = [[], []]
        combined_x_idx[0] = np.arange(0, graph.x.shape[0])  # graph 1 node indices
        combined_x_idx[1] = np.arange(graph.x.shape[0], graph.x.shape[0]+1)  # y index

        idx_dict["node"] = combined_x_idx

        additional_edge_index = torch.cat([
            torch.tensor([[i, catalyst_score_node_index] for i in range(graph.x.shape[0])]),
        ], dim=0).t().contiguous()

        additional_edge_attr = torch.ones((additional_edge_index.shape[1], graph.edge_attr.shape[1]), dtype=torch.float)

        combined_edge_index = torch.cat([graph.edge_index, additional_edge_index], dim=1)

        combined_edge_index_idx = [[], []]
        combined_edge_index_idx[0] = np.arange(0, graph.edge_index.shape[1])  # graph 1 edge index' indices
        combined_edge_index_idx[1] = np.arange(graph.edge_index.shape[1],
                                              graph.edge_index.shape[1] + additional_edge_index.shape[1])  # y edge index' index

        idx_dict["edge_index"] = combined_edge_index_idx

        combined_edge_attr = torch.cat([graph.edge_attr, additional_edge_attr], dim=0)
        combined_edge_attr_idx = [[], []]
        combined_edge_attr_idx[0] = np.arange(0, graph.edge_attr.shape[0])  # graph 1 edge attr' indices
        combined_edge_attr_idx[1] = np.arange(graph.edge_attr.shape[0],
                                              graph.edge_attr.shape[0] + additional_edge_attr.shape[0])  # y edge attr' index

        idx_dict["edge"] = combined_edge_attr_idx
        combined_graph = Data(x=combined_x, edge_index=combined_edge_index, edge_attr=combined_edge_attr, y=catalyst_score_features)
        combined_graph.target = torch.Tensor([[row['Target_encoded']]])

        graphs3.append(combined_graph)
        indices3.append(idx_dict)


In [None]:
data3 = []
for graph, index in zip(graphs3, indices3):
    graph.indices = index
    data3.append(graph)

graph3_loader = DataLoader(data3, batch_size=1, shuffle=False,  drop_last=False)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
effectiveness_model.to(device)

In [None]:
effectiveness_predictions = []
with torch.no_grad():
    for d in graph3_loader:
        d = d.to(device)
        output = effectiveness_model(d)
        effectiveness_predictions.extend(output.cpu().numpy())

In [None]:
df['Binding_Affinity_pred']=np.exp(effectiveness_predictions)

# Catalyst Predictions

In [None]:
import numpy as np
import torch
from rdkit import Chem
from torch_geometric.data import Data

graphs4 = []
indices4 = []
for _, row in df.iterrows():
    idx_dict = {}
    # Convert SMILES to molecule objects
    molecule1 = Chem.MolFromSmiles(row['Drug1'])
    molecule2 = Chem.MolFromSmiles(row['Drug2'])

    # Convert molecules to graph representations
    graph1 = molecule_to_graph(molecule1)
    graph2 = molecule_to_graph(molecule2)

    # Create the "Catalyst Score" node
    num_features = 6  # Number of features you originally had
    catalyst_score_features = torch.zeros((1, num_features), dtype=torch.float)
    catalyst_score_node_index = graph1.x.shape[0] + graph2.x.shape[0]

    # Pad the node features of graph1 and graph2 to match the size of the "Catalyst Score" node features
    pad_size = catalyst_score_features.shape[1] - graph1.x.shape[1]
    graph1_x_padded = torch.cat([graph1.x, torch.zeros((graph1.x.shape[0], pad_size))], dim=1)
    graph2_x_padded = torch.cat([graph2.x, torch.zeros((graph2.x.shape[0], pad_size))], dim=1)

    # Combine the padded node features of graph1, graph2, and the "Catalyst Score" node
    combined_x = torch.cat([graph1_x_padded, graph2_x_padded, catalyst_score_features])

    combined_x_idx = [[], [], []]
    combined_x_idx[0] = np.arange(0,graph1.x.shape[0]) # graph 1 node indices
    combined_x_idx[1] = np.arange(graph1.x.shape[0],graph1.x.shape[0]+graph2.x.shape[0]) # graph 2 node indices
    combined_x_idx[2] = np.arange(graph1.x.shape[0]+graph2.x.shape[0],graph1.x.shape[0]+graph2.x.shape[0]+1) # y node index

    idx_dict["node"] = combined_x_idx

    # Create edges between each node in graph1 and the "Catalyst Score" node
    graph1_to_y_edge_index = torch.cat([
        torch.tensor([[i, catalyst_score_node_index] for i in range(graph1.x.shape[0])])
    ], dim=0).t().contiguous()

    '''
        Create edges between each node in graph2 and the "Catalyst Score" node. Note that a graph.x.shape[0] offset
        is required so that the edge indices are offset
    '''
    graph2_to_y_edge_index = torch.cat([
        torch.tensor([[i + graph1.x.shape[0], catalyst_score_node_index] for i in range(graph2.x.shape[0])])
    ], dim=0).t().contiguous()

    # Combine the edge indices
    combined_edge_index = torch.cat([graph1.edge_index, graph2.edge_index + graph1.x.shape[0], graph1_to_y_edge_index, graph2_to_y_edge_index], dim=1)

    combined_edge_index_idx = [[], [], []]
    combined_edge_index_idx[0] = np.arange(0,graph1.edge_index.shape[1]) # graph 1 edge index' indices
    combined_edge_index_idx[1] = np.arange(graph1.edge_index.shape[1],graph1.edge_index.shape[1]+graph2.edge_index.shape[1]) # graph 2 edge index' indices
    combined_edge_index_idx[2] = np.arange(graph1.edge_index.shape[1]+graph2.edge_index.shape[1],
                        graph1.edge_index.shape[1]+graph2.edge_index.shape[1]+graph1_to_y_edge_index.shape[1]+graph2_to_y_edge_index.shape[1]) # y edge index' index

    idx_dict["edge_index"] = combined_edge_index_idx

    # Edge connections from every other node to Y node
    graph1_to_y_edge_attr = torch.ones((graph1_to_y_edge_index.shape[1], graph1.edge_attr.shape[1]), dtype=torch.float)
    graph2_to_y_edge_attr = torch.ones((graph2_to_y_edge_index.shape[1], graph2.edge_attr.shape[1]), dtype=torch.float)

    # Combine the edge attributes
    combined_edge_attr = torch.cat([graph1.edge_attr, graph2.edge_attr, graph1_to_y_edge_attr, graph2_to_y_edge_attr])

    combined_edge_attr_idx = [[], [], []]
    combined_edge_attr_idx[0] = np.arange(0,graph1.edge_attr.shape[0]) # graph 1 edge attr' indices
    combined_edge_attr_idx[1] = np.arange(graph1.edge_attr.shape[0],graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0]) # graph 2 edge attr' indices
    combined_edge_attr_idx[2] = np.arange(graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0],
                        graph1.edge_attr.shape[0]+graph2.edge_attr.shape[0]+graph1_to_y_edge_attr.shape[0]+graph2_to_y_edge_attr.shape[0]) # y edge attr' index

    idx_dict["edge"] = combined_edge_attr_idx

    # Create the combined graph
    combined_graph = Data(x=combined_x, edge_index=combined_edge_index, edge_attr=combined_edge_attr, y=catalyst_score_features)
    graphs4.append(combined_graph)
    indices4.append(idx_dict)



In [None]:
data4 = []
for graph, index in zip(graphs4, indices4):
    graph.indices = index
    data4.append(graph)

graph4_loader = DataLoader(data4, batch_size=1, shuffle=False,  drop_last=False)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
catalyst_model.to(device)

In [None]:
catalyst_predictions = []
with torch.no_grad():
    for d in graph4_loader:
        d = d.to(device)
        output = catalyst_model(d)
        catalyst_predictions.extend(output.cpu().numpy())

In [None]:
chunked_list1 = chunk_list(catalyst_predictions, 6)

# Creating a DataFrame from the chunked list
new_df = pd.DataFrame(chunked_list1, columns=['CSS_pred', 'Synergy_ZIP_pred', 'Synergy_Bliss_pred', 'Synergy_Loewe_pred', 'Synergy_HSA_pred', 'Synergy_pred'])

# Appending the new DataFrame to the existing DataFrame
df['CSS_pred'] = new_df['CSS_pred']
df['Synergy_ZIP_pred'] = new_df['Synergy_ZIP_pred']
df['Synergy_Bliss_pred'] = new_df['Synergy_Bliss_pred']
df['Synergy_Loewe_pred'] = new_df['Synergy_Loewe_pred']
df['Synergy_HSA_pred'] = new_df['Synergy_HSA_pred']
df['Synergy_pred'] = new_df['Synergy_pred']

In [None]:
df['Y_1_pred'] = df['Y_1_pred'].round().astype(int)
df['Y_2_pred'] = df['Y_2_pred'].round().astype(int)

# Joins

In [None]:
!pip install PubChemPy

import pubchempy as pcp

def get_common_name(smiles):
    try:
        compounds = pcp.get_compounds(smiles, 'smiles')
        # Get the first compound and its common name from synonyms
        if compounds:
            synonyms = compounds[0].synonyms
            if synonyms:
                return synonyms[0]  # Return the first synonym as the common name
            else:
                return "No common name found"
        else:
            return "No compound found"
    except Exception as e:
        return str(e)

In [None]:
get_common_name(df['Drug1'][0])

'palmitic acid'

In [None]:
df['Drug_1_common_name'] = df['Drug1'].apply(get_common_name)

In [None]:
df['Drug_2_common_name'] = df['Drug2'].apply(get_common_name)

In [None]:
ddi_mapping = pd.read_csv('/content/drive/MyDrive/CS 566/Notebooks/DDI_Rishabh_Mapping_Y1_Y2.csv')
eff_mapping = pd.read_csv('/content/drive/MyDrive/CS 566/Notebooks/target_mapping_effectiveness_dataset.csv')

In [None]:
mydict1 = ddi_mapping.set_index('Y_1')['Y_1_definition'].to_dict()

df['Y_1_definition'] = df['Y_1_pred'].map(mydict1)

In [None]:
mydict2 = ddi_mapping.set_index('Y_2')['Y_2_definition'].to_dict()

df['Y_2_definition'] = df['Y_2_pred'].map(mydict2)

In [None]:
import requests

def get_protein_sequence(protein_id):

    url = f"https://rest.uniprot.org/uniprotkb/{protein_id}.fasta"
    response = requests.get(url)

    if response.status_code == 200:
        return response.text
    else:
        return "Error: Unable to retrieve data, please check the protein ID and your connection."

In [None]:
df['Target_info'] = df['Target_ID'].apply(get_protein_sequence)

In [None]:
df.columns

In [None]:
df.to_csv('/content/drive/MyDrive/CS 566/output.csv', index=False)


In [None]:
user = "patient"
prompt = f"I am a {user}. These are the lab results - "

for index, row in df.iterrows():
    # Effectiveness
    sample_prompt = prompt + f"The effectiveness of {row['Drug_1_common_name']} for {row['Target_info']} is {row['Binding_Affinity_pred']}. "

    # DDI
    sample_prompt += f"The Drug-Drug Interaction between {row['Drug_1_common_name']} and {row['Drug_2_common_name']} is {row['Y_1_definition']}, and the side effect of having Drug 2 with Drug 1 is {row['Y_2_definition']}. "

    # DSI
    sample_prompt += f"The solubility of {row['Drug_1_common_name']} in water is {row['Solubility_pred']}. "

    # Catalyst
    sample_prompt += f"CSS measures the drug combination sensitivity and is derived using relative IC50 values of compounds"
    sample_prompt += f"and the area under their dose-response curves. The other four metrics capture the synergy between the"
    sample_prompt += f"two drugs. Synergy is a dimensionless measure of deviation of an observed drug combination response from"
    sample_prompt += f"the expected effect of non-interaction. Synergy is calculated using four different models: Bliss model, Highest Single Agent (HSA),"
    sample_prompt += f"Loewe additivity model and Zero Interaction Potency (ZIP).The synergy between {row['Drug_1_common_name']} and {row['Drug_2_common_name']}"
    sample_prompt += f"is given by CSS - {row['CSS_pred']}, Synergy - {row['Synergy_pred']}, ZIP - {row['Synergy_ZIP_pred']}, LOEWE - {row['Synergy_Loewe_pred']}, "
    sample_prompt += f"HSA - {row['Synergy_HSA_pred']} and BLISS - {row['Synergy_Bliss_pred']}. \n"

    # Stitch them together
    sample_prompt += f"In the report, make sure to incorporate the effect on drug effectiveness from all these interactions."

    print(sample_prompt)
    print('\n\n\n')

I am a patient. These are the lab results - The effectiveness of palmitic acid for >sp|P15090|FABP4_HUMAN Fatty acid-binding protein, adipocyte OS=Homo sapiens OX=9606 GN=FABP4 PE=1 SV=3
MCDAFVGTWKLVSSENFDDYMKEVGVGFATRKVAGMAKPNMIISVNGDVITIKSESTFKN
TEISFILGQEFDEVTADDRKVKSTITLDGGVLVHVQKWDGKSTTIKRKREDDKLVVECVM
KGVTSTRVYERA
 is 0.5560332536697388. The Drug-Drug Interaction between palmitic acid and Dabigatran etexilate is The absorption of #Drug2 can be decreased when combined with #Drug1., and the side effect of having Drug 2 with Drug 1 is acute pancreatitis. The solubility of palmitic acid in water is -0.3353900909423828. CSS measures the drug combination sensitivity and is derived using relative IC50 values of compoundsand the area under their dose-response curves. The other four metrics capture the synergy between thetwo drugs. Synergy is a dimensionless measure of deviation of an observed drug combination response fromthe expected effect of non-interaction. Synergy is calculated usin