In [5]:
import pandas as pd
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
import tmap as tm
from mhfp.encoder import MHFPEncoder
from faerun import Faerun

In [6]:



filename = 'ERB' + ".xlsx"



ERB = pd.read_excel(filename, sheet_name='X_train', engine='openpyxl')

bits = 1024
mols = [Chem.MolFromSmiles(s) for s in ERB['SMILES']]
ERB['mols'] = mols
mol_noH = [Chem.RemoveHs(mol) for mol in ERB['mols']]
ERB['mol_noH'] = mol_noH
ECFP4_fps = [AllChem.GetMorganFingerprintAsBitVect(x,3,bits) for x in mol_noH]
#ECFP4_fps = [MACCSkeys.GenMACCSKeys(x) for x in mol_noH]
ecfp4_lists = [tm.VectorUchar(list(fp)) for fp in ECFP4_fps]

# The number of permutations used by the MinHashing algorithm
perm = 1024

# Initializing the MHFP encoder with 512 permutations
enc = tm.Minhash(bits)

# Initialize the LSH Forest
lf_ecfp4 = tm.LSHForest(bits)

# Create MHFP fingerprints from SMILES
# The fingerprint vectors have to be of the tm.VectorUint data type
lf_ecfp4.batch_add(enc.batch_from_binary_array(ecfp4_lists))
lf_ecfp4.index()
# Get the coordinates
x, y, s, t, _ = tm.layout_from_lsh_forest(lf_ecfp4)

ERB["SmilesID"] = ERB["SMILES"] + '__' + ERB["NURA_ID"]



import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

# Prepare custom color map
tab10 = plt.get_cmap("tab20").colors
colors_gray = [ tab10[0],tab10[4], tab10[12], tab10[2], tab10[3], tab10[6], tab10[1],tab10[6],tab10[7],tab10[8],tab10[9],tab10[10],tab10[11],tab10[12],tab10[13]]
#colors_gray = [ tab10[0],(0.95, 0.95, 0.95), tab10[2], tab10[4]]
custom_cm_gray = LinearSegmentedColormap.from_list("custom_cm_gray", colors_gray, N=len(colors_gray))

from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(ERB['label1'])
ERB_TRANSFORM = le.transform(ERB['label1'])
#print(PR_TRANSFORM)
le.fit(ERB['Prediction'])
ERB_TRANSFORM_prediction = le.transform(ERB['Prediction'])


    
faerun = Faerun(clear_color="#ffffff", view="front", coords=False)

faerun.add_scatter(
    "Activity",
    {"x": x, "y": y, "c": [ERB_TRANSFORM,ERB_TRANSFORM_prediction], "labels": ERB["SmilesID"]},
    point_scale=10,
    colormap=[custom_cm_gray,custom_cm_gray],
    shader="smoothCircle",
    has_legend=True,
    #categorical=True,
    categorical=True,
    legend_labels=[[(2, "Train Active"), (3, "Train Inactive"),(0, "Test Active"), (1, "Test Inactive")], [(4, "Train Active"),(0, "FN"),(1, "FP"), (2, "TN"),(3, "TP"), (5,"Train Inactive")]],
    series_title=["ERB", "Test"],
    title_index=1,
)



faerun.add_tree("Assay_tree", {"from": s, "to": t}, point_helper="Activity",color="#666666")


# Choose the "smiles" template to display structure on hover
faerun.plot(template="smiles", notebook_height=750)

