# Geometric Deep Learning for detecting solubility
In this Jupyter Notebook is used the geometric deep learning tecqnique in order to predict solubility of drugs

In [2]:
#Using geometric deep learning to predict solubility
import numpy as np
from pysmiles import read_smiles
G = read_smiles("CN(C)C(=N)N=C(N)N", explicit_hydrogen=True)
print(G.nodes(data='element'))
print(G.edges)

[(0, 'C'), (1, 'N'), (2, 'C'), (3, 'C'), (4, 'N'), (5, 'N'), (6, 'C'), (7, 'N'), (8, 'N'), (9, 'H'), (10, 'H'), (11, 'H'), (12, 'H'), (13, 'H'), (14, 'H'), (15, 'H'), (16, 'H'), (17, 'H'), (18, 'H'), (19, 'H')]
[(0, 1), (0, 9), (0, 10), (0, 11), (1, 2), (1, 3), (2, 12), (2, 13), (2, 14), (3, 4), (3, 5), (4, 15), (5, 6), (6, 7), (6, 8), (7, 16), (7, 17), (8, 18), (8, 19)]


In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt
from pysmiles import read_smiles
import pandas as pd
import logging
from tqdm import tqdm
import torch
from torch.nn import Sequential as Seq, Linear, ReLU, CrossEntropyLoss
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing, GCNConv
from torch_geometric.utils import remove_self_loops, add_self_loops, degree
from torch_geometric.data import Data
logging.getLogger('pysmiles').setLevel(logging.CRITICAL) # Anything higher than warning

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
df = pd.read_csv('dataset.csv') #read dataset 
X_smiles = list(df['SMILES']) #get smiles strings
Y = np.asarray(df['Solubility']) #get solubility values
#list of all elements in the dataset, which I've precomputed
elements = ['K', 'Y', 'V', 'Sm', 'Dy', 'In', 'Lu', 'Hg', 'Co', 'Mg', 
 'Cu', 'Rh', 'Hf', 'O', 'As', 'Ge', 'Au', 'Mo', 'Br', 'Ce', 
 'Zr', 'Ag', 'Ba', 'N', 'Cr', 'Sr', 'Fe', 'Gd', 'I', 'Al', 
 'B', 'Se', 'Pr', 'Te', 'Cd', 'Pd', 'Si', 'Zn', 'Pb', 'Sn', 
 'Cl', 'Mn', 'Cs', 'Na', 'S', 'Ti', 'Ni', 'Ru', 'Ca', 'Nd', 
 'W', 'H', 'Li', 'Sb', 'Bi', 'La', 'Pt', 'Nb', 'P', 'F', 'C']
#convert element to a one-hot vector of dimension len(elements)
def element_to_onehot(element):
 out = []
 for i in range(0, len(element)):
    v = np.zeros(len(elements))
    v[elements.index(element[i])] = 1.0
    out.append(v)
 return np.asarray(out)