In [1]:
import numpy as np
import pandas as pd
from rdkit import Chem
from utils import *

In [2]:
mols = pd.read_csv("data/sider.csv")
mols

Unnamed: 0,smiles,Hepatobiliary disorders,Metabolism and nutrition disorders,Product issues,Eye disorders,Investigations,Musculoskeletal and connective tissue disorders,Gastrointestinal disorders,Social circumstances,Immune system disorders,...,"Congenital, familial and genetic disorders",Infections and infestations,"Respiratory, thoracic and mediastinal disorders",Psychiatric disorders,Renal and urinary disorders,"Pregnancy, puerperium and perinatal conditions",Ear and labyrinth disorders,Cardiac disorders,Nervous system disorders,"Injury, poisoning and procedural complications"
0,C(CNCCNCCNCCN)N,1,1,0,0,1,1,1,0,0,...,0,0,1,1,0,0,1,1,1,0
1,CC(C)(C)C1=CC(=C(C=C1NC(=O)C2=CNC3=CC=CC=C3C2=...,0,1,0,0,1,1,1,0,0,...,0,1,1,0,0,0,1,0,1,0
2,CC[C@]12CC(=C)[C@H]3[C@H]([C@@H]1CC[C@]2(C#C)O...,0,1,0,1,1,0,1,0,1,...,0,0,0,1,0,0,0,0,1,0
3,CCC12CC(=C)C3C(C1CC[C@]2(C#C)O)CCC4=CC(=O)CCC34,1,1,0,1,1,1,1,0,1,...,1,1,1,1,1,1,0,0,1,1
4,C1C(C2=CC=CC=C2N(C3=CC=CC=C31)C(=O)N)O,1,1,0,1,1,1,1,0,1,...,0,1,1,1,0,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1422,C[C@H]1CN(CC[C@@]1(C)C2=CC(=CC=C2)O)C[C@H](CC3...,0,1,0,0,0,1,1,0,0,...,0,0,0,0,1,0,0,0,0,0
1423,CC[C@@H]1[C@@]2([C@@H]([C@@H](C(=O)[C@@H](C[C@...,1,1,0,1,1,1,1,0,1,...,0,1,1,1,1,0,1,1,1,1
1424,CCOC1=CC=C(C=C1)CC2=C(C=CC(=C2)[C@H]3[C@@H]([C...,1,1,0,0,1,1,1,0,1,...,0,1,0,0,1,0,0,1,1,1
1425,C1CN(CCC1N2C3=CC=CC=C3NC2=O)CCCC(C4=CC=C(C=C4)...,0,1,0,1,1,1,1,0,0,...,0,0,0,1,1,0,0,1,1,1


In [3]:
"""
This is the procedure to get
H -> Node feature vector
E -> Edge feature vector
A -> Adjacency Matrix
for a certain molecule
"""

smi = mols["smiles"][0]
mol = Chem.MolFromSmiles(smi) #Transform to graph with
mol = Chem.RemoveHs(mol) #Remove Hs


num_atoms = mol.GetNumAtoms()
# Adjacency matrix (self-connected)
A = np.zeros((num_atoms, num_atoms), dtype = int)

edge_feat = [bond.GetBondType() for bond in mol.GetBonds()]
node_feat = [atom.GetAtomicNum() for atom in mol.GetAtoms()]


for bond in mol.GetBonds():
    """
    Build adjacency matrix
    """
    i = bond.GetBeginAtomIdx()
    j = bond.GetEndAtomIdx()

    A[i,j] = 1
    A[j,i] = 1

np.fill_diagonal(A, 1)

H = np.array(node_feat)
E = np.array(edge_feat)
H = H.reshape(-1, 1)
E = E.reshape(-1, 1)
E.shape
H.shape

(13, 1)

In [4]:
gin = GIN(norma = True)
H_out = gin.forward(H, A)
H_out

array([[-21.47680187],
       [ 17.80961495],
       [ 13.53286212],
       [  8.07176291],
       [ -2.2182472 ],
       [-12.89965684],
       [-21.51215873],
       [  4.60775289],
       [  1.77825265],
       [ 10.66109944],
       [  8.85765765],
       [-11.04068196],
       [ -5.34094779]])

In [5]:
ReLU.relu(H_out)

array([[ 0.        ],
       [17.80961495],
       [13.53286212],
       [ 8.07176291],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 4.60775289],
       [ 1.77825265],
       [10.66109944],
       [ 8.85765765],
       [ 0.        ],
       [ 0.        ]])

In [6]:
A

array([[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

In [7]:
D = np.zeros(A.shape, dtype = int)
np.fill_diagonal(D, np.sum(A, axis = 0))
D

array([[3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]])

In [8]:
from scipy.linalg import sqrtm
D_tilda = np.linalg.inv(sqrtm(D))
A_hat = np.dot(np.dot(D_tilda, A), D_tilda)
A_hat

array([[0.33333333, 0.33333333, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.40824829],
       [0.33333333, 0.33333333, 0.33333333, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.33333333, 0.33333333, 0.33333333, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.33333333, 0.33333333, 0.33333333,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.33333333, 0.33333333,
        0.33333333, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.33333333,
        0.33333333, 0.33333

In [9]:
gin = GIN(norma = True, renorma = True)
H_out = gin.forward(H, A)
H_out

array([[ 7.26526326],
       [14.88497448],
       [-6.17553578],
       [ 7.99891747],
       [-1.24696514],
       [-8.2359142 ],
       [ 4.37046077],
       [ 2.75918889],
       [ 3.79001556],
       [-0.22163833],
       [ 2.86473971],
       [-2.8465323 ],
       [11.79347215]])

In [10]:
N = H.shape[0]
pad_size = 25 - N
H_pad = np.pad(H, ((0, pad_size), (0,0)), mode = "constant", constant_values = 0)
H_pad # We pad at the end I guess; also the same for A

array([[6],
       [6],
       [7],
       [6],
       [6],
       [7],
       [6],
       [6],
       [7],
       [6],
       [6],
       [7],
       [7],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0]])

In [18]:
resize = Resize(λ = 32)
H_, A_ = resize.forward(H, A)
A_

array([[1, 1, 0, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [0, 1, 1, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])