Link to the video : https://youtu.be/646Tblf1Qx4

In [None]:
%%capture
!pip install mordred
!pip install rdkit

In [None]:
# Importing Libraries
import rdkit
from rdkit import Chem
from rdkit.Chem import Draw, PandasTools, AllChem

import mordred
from mordred import Calculator, descriptors

In [None]:
import pickle
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [None]:
!pip install session-info



In [None]:
import session_info
session_info.show()

In [None]:
with open('scaler.pkl','rb') as f:
    scaler = pickle.load(f)
with open('model_rf', 'rb') as f:
    rf = pickle.load(f)


# **Predict for a single SMILES string**

In [None]:
def predict_smiles(smiles):
  mol = Chem.MolFromSmiles(smiles)
  mol = Chem.AddHs(mol)
  AllChem.EmbedMolecule(mol)

  df_mol = pd.DataFrame(data = [mol], columns=(['mol']), dtype='object')
  calc = Calculator(descriptors, ignore_3D=False)
  desc = calc.pandas(df_mol['mol'])
  desc_8 = desc[['FilterItLogS', 'Lipinski', 'SIC0', 'RNCG', 'RPCG', 'ATS0Z',
        'PEOE_VSA6', 'AATS0i']]
  X_test = scaler.transform(desc_8)
  predict = rf.predict(X_test)
  return predict[0]

In [None]:
predict_smiles('OC1=CC=C(O)C2=C(O)C=CC(O)=C21')

100%|██████████| 1/1 [00:00<00:00,  4.35it/s]


-1.7495018777507203

# **Predict from a csv file containing list of SMILES strings**

In [None]:

def predict_csv(csv):
    df = pd.DataFrame(csv)
    result = []
    for j in range(len(df)):
        smiles = df.iloc[j, 0]
        prediction = predict_smiles(smiles)
        result.append(prediction)
    df[1]=result
    print(df)
    return df

In [None]:
def predict_file(file):
  csv = pd.read_csv(file, header=None)
  prediction = predict_csv(csv)

In [None]:
predict_file('pred.csv')

100%|██████████| 1/1 [00:00<00:00,  6.07it/s]
100%|██████████| 1/1 [00:00<00:00,  5.74it/s]


                       0         1
0  NC1=CC(CO)=C(C#N)C=C1 -1.599943
1    NC1=CC(CO)=C(O)C=C1 -1.082530


# **Predict from list of SMILES strings**

In [None]:
def predict_sol_smiles(smiles):
  mols = []
  for smile in smiles:
    mol = Chem.MolFromSmiles(smile)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol)
    mols.append(mol)
  df_mol = pd.DataFrame(data = mols, columns=(['mol']), dtype='object')
  calc = Calculator(descriptors, ignore_3D=True)
  desc = calc.pandas(df_mol['mol'])
  desc_8 = desc[['FilterItLogS', 'Lipinski', 'SIC0', 'RNCG', 'RPCG', 'ATS0Z',
        'PEOE_VSA6', 'AATS0i']]
  X_test = scaler.transform(desc_8)
  predict = rf.predict(X_test)
  print(f'The Predicted Solubilities is {predict}')

In [None]:
smiles = ['OCC1=C(O)C=C(O)C=C1', 'NCC1=C(N)C=C(N)C=C1', 'CCC1=C(C)C=C(C)C=C1']
predict_sol_smiles(smiles)