In [4]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator
from sklearn.model_selection import GridSearchCV
from lightgbm import LGBMClassifier, LGBMRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from catboost import CatBoostClassifier, CatBoostRegressor
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.linear_model import LogisticRegression
import joblib

import warnings; warnings.filterwarnings('ignore')

In [2]:
kinase_names = [
    "KDR",
    "FLT1",
    "p110a",
    "JAK1",
    "JAK2",
    "ErbB2",
    "EGFR",
    "PIM1",
    "ROCK1",
    "ABL1",
    "TK",
    "CMGC",
    "AGC",
    "CAMK",
    "Atypical",
]

models_reg = [joblib.load(f"{filepath}_reg.pkl") for filepath in kinase_names]
models_class = [joblib.load(f"{filepath}_class.pkl") for filepath in kinase_names]

In [5]:
s = str(input("Paste SMILES String Below:\n"))
mol = Chem.MolFromSmiles(s)
generator = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=2048)
fingerprint = generator.GetFingerprint(mol)
print(np.stack(fingerprint))

fp = np.stack(fingerprint)
X = fp.reshape(1, -1)

print(f"\n\nPredicted pIC50 Values for the Molecule {s}\n\n")

for name, model in zip(kinase_names, models_reg):
    prediction = model.predict(X)[0]
    print(f"{name}: {prediction}")

print(f"\n\nPredicted Inhibition Class for the Molecule {s}\n\n")

for name, model in zip(kinase_names, models_class):
    prediction = model.predict(X)[0]
    print(f"{name}: {prediction}")

Paste SMILES String Below:
 N#CCC(C1CCCC1)n2cc(cn2)-c3c4cc[nH]c4ncn3


[0 1 0 ... 0 0 0]


Predicted pIC50 Values for the Molecule N#CCC(C1CCCC1)n2cc(cn2)-c3c4cc[nH]c4ncn3


KDR: 7.119611758467642
FLT1: 7.063214524229798
p110a: 7.848398245913299
JAK1: 9.35441745927617
JAK2: 9.321930096732194
ErbB2: 5.001733548790409
EGFR: 5.152643131820699
PIM1: 6.871392627092416
ROCK1: 6.898979296008131
ABL1: 7.1820261433138635
TK: 8.758711415171106
CMGC: 7.583905553876884
AGC: 6.889817583605163
CAMK: 10.292700608362406
Atypical: 7.7442526550885775


Predicted Inhibition Class for the Molecule N#CCC(C1CCCC1)n2cc(cn2)-c3c4cc[nH]c4ncn3


KDR: 0
FLT1: 0
p110a: 0
JAK1: 1
JAK2: 1
ErbB2: 0
EGFR: 0
PIM1: 0
ROCK1: 0
ABL1: 0
TK: 1
CMGC: 0
AGC: 1
CAMK: 0
Atypical: 1
