In [1]:
from tdc.single_pred import Tox
from sklearn import svm

data = Tox(name = 'hERG_Karim')
df = data.get_data()
df

Downloading...
100%|██████████| 885k/885k [00:00<00:00, 1.18MiB/s]
Loading...
Done!


Unnamed: 0,Drug_ID,Drug,Y
0,0,Fc1ccc(-n2cc(NCCN3CCCCC3)nn2)cc1F,1
1,1,COc1cc(N2Cc3ccc(Sc4ccc(F)cc4)nc3C2=O)ccc1OCCN1...,0
2,2,CCOC(=O)[C@H]1CC[C@@H](N2CC(NC(=O)CNc3nn(C(N)=...,0
3,3,N[C@@H](Cn1c(=O)cnc2ccc(F)cc21)C1CCC(NCc2ccc3c...,0
4,4,O=C(NC1COc2cccc(-c3ccnc(CO)c3)c2C1)c1ccc(OCC(F...,0
...,...,...,...
13440,13440,Cc1csc(NC(=O)c2sc3nc4c(c(C(F)(F)F)c3c2N)CCC4)n1,0
13441,13441,Cc1cccc(-c2n[nH]cc2-c2ccc3ncccc3n2)n1,0
13442,13442,Cc1ccccc1-n1c(Cn2cnc3c(N)ncnc32)nc2cccc(C)c2c1=O,0
13443,13443,Cc1ccccc1-n1c(Cn2ncc3c(N)ncnc32)nc2cccc(C)c2c1=O,0


In [2]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

#data conversion 
split = data.get_split()
smiles_train = split['train']['Drug']
Y_train = split['train']['Y']
smiles_valid = split['valid']['Drug']
Y_valid = split['valid']['Y']

#Compute Morgan fingerprints
def compute_morgan_fingerprint(smiles, radius=2, nBits=1024):
    mol = Chem.MolFromSmiles(smiles)
    fingerprint = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits)
    return np.array(fingerprint)

# Compute fingerprints
train_data = smiles_train.apply(compute_morgan_fingerprint)
X_train_fingerprints = np.stack(train_data.values)

valid_data = smiles_valid.apply(compute_morgan_fingerprint)
X_valid_fingerprints = np.stack(valid_data.values)

# Train SVM model
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_fingerprints, Y_train)

# Predict and evaluate SVM model
y_pred_svm = svm_model.predict(X_valid_fingerprints)
print("SVM Accuracy:", accuracy_score(Y_valid, y_pred_svm))
print("SVM Classification Report:\n", classification_report(Y_valid, y_pred_svm))



SVM Accuracy: 0.7418154761904762
SVM Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.73      0.74       683
           1       0.73      0.76      0.74       661

    accuracy                           0.74      1344
   macro avg       0.74      0.74      0.74      1344
weighted avg       0.74      0.74      0.74      1344

