<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [1]:
!pip install svgutils
# Grid is broken in latest version!
!pip install rdkit-pypi==2021.9.4



In [2]:
from IPython.display import SVG
import svgutils.transform as sg

from rdkit import Chem
from rdkit.Chem import rdFMCS
from rdkit.Chem import AllChem
from rdkit.Chem import Draw

import pandas as pd
import os
import numpy as np
from tqdm.auto import tqdm
from functools import lru_cache

tqdm.pandas()

In [3]:
@lru_cache(200000)
def make_rdkit_canonical(s):
    ms = Chem.MolFromSmiles(s)
    s = Chem.MolToSmiles(ms, canonical=True, isomericSmiles=False)
    return s


@lru_cache(200000)
def get_mol(s):
    """
    Do not add H, if you do not need them
    """
    mol = Chem.MolFromSmiles(s)
    # mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol, randomSeed=7)
    AllChem.UFFOptimizeMolecule(mol)
    return mol

In [4]:
def draw_in_gird(path, col='similarity', xlabeltext=None):

    for target_folder in os.listdir(path):

        F, T = [], []
        t100, t300, t500, t700, t900 = [], [], [], [], []

        for K, k in enumerate(sorted(os.listdir(path + target_folder))):
            df = pd.read_csv(path + target_folder + '/' + k)
            df = df.sort_values(by=col, ascending=False)
            
            # auto rotate 2d
            mols = [get_mol(s) for s in df.smiles.values.tolist()[:5]]
            template = mols[2]
            
            mcs = rdFMCS.FindMCS(mols,
                                 threshold=0.9,
                                 completeRingsOnly=True,
                                 ringMatchesRingOnly=True)
            patt = Chem.MolFromSmarts(mcs.smartsString)

            for query in mols:

                query_match = query.GetSubstructMatch(patt)
                template_match = template.GetSubstructMatch(patt)

                AllChem.Compute2DCoords(query)
                AllChem.Compute2DCoords(patt)

                rms = AllChem.AlignMol(query,
                                       template,
                                       atomMap=list(
                                           zip(query_match, template_match)))

            im = Draw.MolsToGridImage(mols,
                                      molsPerRow=5,
                                      useSVG=True,
                                      subImgSize=(200, 200))

            with open(f"tmp.svg", 'w') as f:
                f.write(im)
                
            # read and concat
            fg = sg.fromfile(f"tmp.svg").getroot()
            fg.moveto(0, 200 * K)
            txt = sg.TextElement(-((200 * K) + 130),
                                 0,
                                 k.replace('.csv', ''),
                                 size=18,
                                 font='Arial')
            txt.rotate(angle=-90)

            five_scores = [
                str(np.round(s, 3))
                for s in df[col].values.tolist()[:5]
            ]

            i = 0
            for t, p in zip([t100, t300, t500, t700, t900],
                            [100, 300, 500, 700, 900]):
                t.append(
                    sg.TextElement(p, (K + 1) * 200,
                                   five_scores[i],
                                   size=14,
                                   font='Arial'))
                i += 1

            T.append(txt)
            F.append(fg)

        fig = sg.SVGFigure("21.0cm", "29.7cm")
        fig.append(F)
        fig.append(T)
        for t in [t100, t300, t500, t700, t900]:
            fig.append(t)

        names = {"gaba": "GABA", "mtor": "mTOR", "vdr": "VDR"}
        title = sg.TextElement(500,
                               0,
                               names[target_folder],
                               size=21,
                               font='Arial')
        xlabel = sg.TextElement(450, (K + 1) * 200 + 50,
                                xlabeltext,
                                size=21,
                                font='Arial')

        fig.append(title)
        fig.append(xlabel)

        # save generated SVG files
        fig.save(f"top5-by-{col}-{target_folder}.svg")

In [5]:
!unzip predict_data.zip

Archive:  predict_data.zip
replace predict_data/gaba/CDN.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: predict_data/gaba/CDN.csv  
  inflating: predict_data/gaba/GMDLDR.csv  
  inflating: predict_data/gaba/LigDream.csv  
  inflating: predict_data/gaba/REINVENT.csv  
  inflating: predict_data/gaba/REINVENT3.csv  
  inflating: predict_data/gaba/Transmol.csv  
  inflating: predict_data/gaba/TransVAE.csv  
  inflating: predict_data/mtor/CDN.csv  
  inflating: predict_data/mtor/GMDLDR.csv  
  inflating: predict_data/mtor/LigDream.csv  
  inflating: predict_data/mtor/REINVENT.csv  
  inflating: predict_data/mtor/REINVENT3.csv  
  inflating: predict_data/mtor/Transmol.csv  
  inflating: predict_data/mtor/TransVAE.csv  
  inflating: predict_data/vdr/CDN.csv  
  inflating: predict_data/vdr/GMDLDR.csv  
  inflating: predict_data/vdr/LigDream.csv  
  inflating: predict_data/vdr/REINVENT.csv  
  inflating: predict_data/vdr/REINVENT3.csv  
  inflating: predict_data/vdr/Transmol.csv  
  

In [6]:
# If something is not 2d you need to run function below 2 times in a row. No idea why. 
# See output svg if it is 2d or if it is 3d like. Then run again
draw_in_gird('predict_data/', 'similarity', 'Similarity scores')
draw_in_gird('predict_data/', 'similarity', 'Similarity scores')

In [7]:
fig = sg.fromfile(f'top5-by-similarity-gaba.svg')

In [8]:
!zip sim-svg.zip top5-by-similarity-vdr.svg top5-by-similarity-gaba.svg top5-by-similarity-mtor.svg

updating: top5-by-similarity-vdr.svg (deflated 89%)
updating: top5-by-similarity-gaba.svg (deflated 89%)
updating: top5-by-similarity-mtor.svg (deflated 89%)


In [9]:
draw_in_gird('predict_data/', 'score-svm', 'SVM-scores')
draw_in_gird('predict_data/', 'score-svm', 'SVM-scores')

In [10]:
!zip svm-svg.zip top5-by-score-svm-vdr.svg top5-by-score-svm-gaba.svg top5-by-score-svm-mtor.svg

updating: top5-by-score-svm-vdr.svg (deflated 89%)
updating: top5-by-score-svm-gaba.svg (deflated 90%)
updating: top5-by-score-svm-mtor.svg (deflated 89%)
