In [1]:
# Imports
import pickle
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
import defaults

In [2]:
# Load the dataset
df = pd.read_csv('./datasets/untested_molecules.csv')

In [3]:
df['Descriptors'] = df['SMILES'].apply(defaults.compute_descriptors)
descriptors_df = pd.json_normalize(df['Descriptors'])
df = pd.concat([df, descriptors_df], axis=1).drop(columns=['Descriptors'])

In [4]:
def smiles_to_fingerprint(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024)
        return list(fp)
    else:
        return [0] * 1024

# Compute fingerprints and expand into separate columns
df['Fingerprint'] = df['SMILES'].apply(smiles_to_fingerprint)
fingerprints_df = pd.DataFrame(df['Fingerprint'].tolist(), columns=[f'Fingerprint_{i}' for i in range(1024)])

# Concatenate the original DataFrame with the new fingerprints DataFrame
df = pd.concat([df, fingerprints_df], axis=1).drop(columns=['Fingerprint'])


In [5]:
def apply_model(inhibitor):
    with open(f"./models/model_{inhibitor}.pkl", "rb") as f:
        model = pickle.load(f)

    x_data = df[defaults.get_descriptors(inhibitor)]
    df[inhibitor] = (model.predict(x_data) >= 0.5).astype(int)

In [6]:
for inhibitor in defaults.y_data:
    apply_model(inhibitor)

In [7]:
result_df = df.iloc[:, :3]
result_df.to_csv("./datasets/applied_molecules.csv")