In [1]:
!pip install rdkit

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rdkit
  Downloading rdkit-2022.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.4/29.4 MB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: rdkit
Successfully installed rdkit-2022.9.5


In [3]:
# Import necessary libraries
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import math

# Load input data
data = pd.read_csv('/content/ic50ca.csv')

# Convert SMILES sequences to molecular structures
mols = [Chem.MolFromSmiles(smiles) for smiles in data['SMILES']]
for mol in mols:
    AllChem.Compute2DCoords(mol)

# Calculate potency (IC50) values for each drug
ic50_values = [10**(-binding_affinity/math.log10(50)) for binding_affinity in data['Binding Affinity']]

# Calculate selectivity index (SI) for each drug
#target_protein = 'Target Protein' # replace with name of target protein
#non_target_protein = 'Non-Target Protein' # replace with name of non-target protein
#si_values = [ic50_values[i] / ic50_values[j] for i in range(len(data)) for j in range(len(data)) if data.iloc[i]['Protein'] == target_protein and data.iloc[j]['Protein'] == non_target_protein]

# Output results
#results = pd.DataFrame({'Drug': data['Drug'], 'Potency (IC50)': ic50_values, 'Selectivity Index': si_values})
#print(results)

results = pd.DataFrame({'Potency (IC50)': ic50_values})
print(results)


   Potency (IC50)
0        0.000080
1        0.000014
2        0.000065
3        0.000097
4        0.000004
5        0.000034
6        0.000041
7        0.000139
8        0.000004
9        0.000083


In [4]:
target_ic50 = ic50_values[0] # Assume first drug is the target protein
si_values = [target_ic50/ic50 for ic50 in ic50_values[1:]]
print('Selectivity index values:', si_values)

Selectivity index values: [5.856240943030965, 1.2209844020355205, 0.821932310799922, 19.52700956302108, 2.3638238199740824, 1.9317516746468957, 0.5734933348213801, 21.931680572947972, 0.9587777028572394]


In [5]:
data['Potency (IC50)'] = ic50_values

In [6]:
data['Selectivity Index'] = [0.000001] + si_values

In [7]:
data.to_csv('ic50ca_with_IC50_SI.csv', index=False)

In [8]:
import pandas as pd

# Define weights for each property
binding_affinity_weight = 0.5
ic50_weight = 0.3
selectivity_index_weight = 0.2

data = pd.read_csv('/content/ic50ca_with_IC50_SI.csv')


# Calculate weighted scores for each SMILES
data['Weighted Score'] = (data['Binding Affinity']*binding_affinity_weight 
                          + data['Potency (IC50)']*ic50_weight 
                          + data['Selectivity Index']*selectivity_index_weight)


print(data)

# Rank the SMILES based on their weighted scores (in descending order)
data = data.sort_values(by=['Weighted Score'], ascending=False)
print('Ranked:')
print(data[['SMILES', 'Weighted Score']])



                                              SMILES  Binding Affinity  \
0  ONC(N1CC2OC(C(=O)NCc3ccc(-c4ccccc4)cc3)C(C1)O2)=O          6.962898   
1  C(NC(=O)C1C2OC(CN(C(NO)=O)C2)O1)c1ccc(-c2ccccc...          8.267059   
2  c1(CNC(C2OC3OC2CN(C(NO)=O)C3)=O)ccc(-c2ccccc2)cc1          7.110216   
3  ONC(N1CC2OC(C(NCc3ccc(-c4ccccc4)cc3)=O)C(C1)O2)=O          6.818207   
4  c1cc(CNC(=O)C2C3OC(CN(C(=O)NO)C3)O2)ccc1-c1ccccc1          9.155649   
5  C12CN(C(NO)=O)CC(O1)C(C(=O)NCc1ccc(-c3ccccc3)c...          7.597659   
6    O1C2CN(C(NO)=O)CC1OC2C(=O)NCc1ccc(-c2ccccc2)cc1          7.448720   
7  N(C(=O)N1CC2OC(C(NCc3ccc(-c4ccccc4)cc3)=O)C(O2...          6.552645   
8    c1cc(CNC(=O)C2OC3OC2CN(C(=O)NO)C3)ccc1-c1ccccc1          9.241339   
9  C12OC(C(NCc3ccc(-c4ccccc4)cc3)=O)C(O1)CN(C(NO)...          6.931837   

                                      Target Protein  Potency (IC50)  \
0  GPVWRKHYITYRINNYTPDMNREDVDYAIRKAFQVWSNVTPLKFSK...        0.000080   
1  GPVWRKHYITYRINNYTPDMNREDVDYAIRKAFQVWSN

In [9]:
data.to_csv('Ranked_SMILES.CSV', index=False)