In [1]:
import pickle
import tmap as tm
import numpy as np
from faerun import Faerun
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
import matplotlib.cm as cm
import pandas as pd

from rdkit import Chem
from rdkit.Chem import AllChem

In [2]:
df = []
for x in ['Amine', 'Carboxyl', 'Boronate', 'Bromo']:
    with open('{}FPS.pickle'.format(x),'rb') as p:
        fps = pickle.load(p)
        print(len(fps))
    cur_df = pd.read_csv('all{}Prods.csv'.format(x))
    cur_df['ReagB'] = x
    print(len(cur_df))
    cur_df['FPS'] = fps
    df.append(cur_df) 
df = pd.concat(df)

431326
431326
713866
713866
11715
11715
82319
82319


In [8]:
df.drop(columns = ['FPS']).to_csv('AllProds.csv',index = False)

In [4]:
from tqdm.contrib.concurrent import process_map
from mhfp.encoder import MHFPEncoder

# Wrapper function for pickling process_map for multiprocessing
def cal_fp(smi):
    fp = enc.encode_mol(Chem.MolFromSmiles(smi))
    return fp

In [4]:
enc = MHFPEncoder(1024)
fps = process_map(cal_fp, 
                  df['Smiles'], 
                  chunksize = 100, 
                  max_workers = 20)

HBox(children=(FloatProgress(value=0.0, max=82319.0), HTML(value='')))




In [5]:
with open('BromoFPS.pickle','wb') as f:
        pickle.dump(fps,f)

In [6]:
fps = [tm.VectorUint(fp) for fp in df['FPS']]
print(len(fps),'Fingerprints Converted')

1239226 Fingerprints Converted


In [7]:
lf = tm.LSHForest(512, 64)
lf.batch_add(fps)
lf.index()
lf.store("lf_All_Prods.dat")

### Load Data

In [2]:
df = pd.read_csv('AllProds.csv')
lf = tm.LSHForest(512, 64)
lf.restore("lf_All_Prods.dat")
lf.index()

In [3]:
# Create a LayoutConfiguration instance
cfg = tm.LayoutConfiguration()
cfg.node_size = 1/10
cfg.mmm_repeats = 2
cfg.sl_extra_scaling_steps = 8
cfg.k = 10
cfg.sl_scaling_type = tm.RelativeToAvgLength

#Create minimum spanning tree from the LSHForest and LayoutConfiguration instance
#The x and y coordinates of the vertices, the ids of the vertices spanning the edges
#information on the graph is ignored

x, y, s, t, _ = tm.layout_from_lsh_forest(lf, cfg)

In [4]:
x = list(x)
y = list(y)
s = list(s)
t = list(t)
with open("all_coords.dat","wb") as f:
    pickle.dump((x,y,s,t),f)

In [12]:
hsv_4 = ListedColormap(cm.hsv(np.linspace(0,0.75,4)), name="hsv_4")
ReagB_labels, ReagB_data = Faerun.create_categories(df["ReagB"])

In [21]:
f = Faerun(view="front", alpha_blending=True, coords=False)
f.add_scatter(
    # No space in the string allowed for the name, use underscore!!
    # Cannot start with a number, it has to be a letter!!
    # My guess is that the string is to be converted to a variable name, 
    # therefore it has to be compatible with python/javascript variable naming scheme
    "All_Products",
    {
        "x": x,
        "y": y,
        "c": [
            ReagB_data,
            df['MolWt'],
            df['LogP'],
            df['QED'],
            df['HBA'],
            df['HBD']
        ],
        "labels": df['Smiles'],  # Specify the label used for toggle window here
    },
    shader="smoothCircle",
    point_scale=0.5,
    max_point_size=5,
    categorical=[
                True,
                 False, 
                 False, 
                 False, 
                 False, 
                 False,
    ],
    colormap=[
        hsv_4,
        "rainbow", 
        "rainbow", 
        "rainbow",
        "rainbow", 
        "rainbow" 
    ],
    
    series_title=[
        "Molecular Weight",
        "Lipophilicity",
        "Quantitative Estimate of Druglikeness",
        "Hydrogen Bond Donor",
        "Hydrogen Bond Acceptor",
    ],
    has_legend=True, # Show the legend on the lower right corner
    legend_labels=[ReagB_labels]
)

# The first character of the name has to be a letter!
f.add_tree("All_Products_tree", {"from": s, "to": t}, point_helper="All_Products")

f.plot('All_Products'+'_2D_Space_2', # name of the .html file
       template="smiles")