### Write link files for chord diagrams. This is for the circos package.

Download circos here: http://circos.ca/software/download/

There are pre-written files in the ./circos directory: 
- circos_athamontanolide.conf
- circos_noscapine.conf
- ticks.conf
- kary_single_plainbands.txt

These are configuration files used by the circos package, used to adjust the appearance of the chord diagram.



In [1]:
from rdkit import Chem
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import cm

from tqdm.notebook import tqdm

In [2]:
col_map = plt.cm.get_cmap('plasma')

def rgb_convert(rgb,alpha):
    """ rgb is a tuple of rgb in 0-1
    alpha is the alpha."""

    rgb = [round(255*x,5) for x in rgb[:3]]

    return f"({rgb[0]},{rgb[1]},{rgb[2]},{alpha})"

### load data

In [3]:
# swap out the data later...
data = pd.read_csv("./data_files/smiles_min_dist_dbank_props.csv")
data.sort_values("min_dist_all",inplace=True)
data.reset_index(drop=True,inplace=True)
data.head()

Unnamed: 0,smiles,min_dist_all,natoms,drugbank_matches,LogP,MW,HBD,HBA,PSA,ROTB,FSP3,SSSR,QED
0,C=CC(=O)O,1.0,5,321,0.257,72.063,1.0,1.0,37.3,1.0,0.0,0.0,0.450858
1,CCC(=O)O,1.0,5,2357,0.481,74.079,1.0,1.0,37.3,1.0,0.666667,0.0,0.490845
2,NCCCC(=O)O,2.0,7,289,-0.1901,103.121,2.0,2.0,63.32,3.0,0.75,0.0,0.520009
3,CCC(O)O,2.0,5,551,-0.2929,76.095,2.0,2.0,40.46,1.0,1.0,0.0,0.421095
4,CNC=CC(=O)O,2.0,7,34,-0.1959,101.105,2.0,2.0,49.33,2.0,0.25,0.0,0.473583


In [4]:
data = data[(data.SSSR<=4) & (data.min_dist_all<=6)].copy()
data.reset_index(drop=True,inplace=True)

### load drug and search

In [5]:
noscapine = "O=C2O[C@@H](c1ccc(OC)c(OC)c12)[C@@H]5N(C)CCc4c5c(OC)c3OCOc3c4"

In [6]:
search_molecule = Chem.MolFromSmiles(noscapine)
match_data = []

for s in tqdm(data.smiles):
    m = Chem.MolFromSmiles(s)
    match_data.append(len(search_molecule.GetSubstructMatches(m)))

  0%|          | 0/11759 [00:00<?, ?it/s]

In [7]:
data["nmatches"] = match_data
data.sort_values("nmatches",ascending=True,inplace=True)

In [8]:
data

Unnamed: 0,smiles,min_dist_all,natoms,drugbank_matches,LogP,MW,HBD,HBA,PSA,ROTB,FSP3,SSSR,QED,nmatches
0,C=CC(=O)O,1.0,5,321,0.2570,72.063,1.0,1.0,37.30,1.0,0.0,0.0,0.450858,0
7830,CCN(O)CCCO,6.0,8,4,0.0799,119.164,2.0,3.0,43.70,4.0,1.0,0.0,0.514958,0
7831,CCNCC1CO1,6.0,7,14,-0.0053,101.149,1.0,2.0,24.56,3.0,1.0,1.0,0.505751,0
7832,C=CC(N)=C=O,6.0,6,0,-0.1534,83.090,1.0,2.0,43.09,1.0,0.0,0.0,0.353522,0
7833,CCNCC(O)CO,6.0,8,285,-1.0509,119.164,3.0,3.0,52.49,4.0,1.0,0.0,0.442342,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4127,CCOC,5.0,4,3855,0.6527,60.096,0.0,1.0,9.23,1.0,1.0,0.0,0.431501,3
9937,CCN(C)C,6.0,5,3095,0.5679,73.139,0.0,1.0,3.24,1.0,1.0,0.0,0.438308,3
6462,CCCNCC,6.0,6,3378,1.0059,87.166,1.0,1.0,12.03,3.0,1.0,0.0,0.506367,3
1495,CCCNC,5.0,5,4295,0.6158,73.139,1.0,1.0,12.03,2.0,1.0,0.0,0.504854,4


### process and write linker file

In [9]:
# color and location settings
# location on the black band - arbitrary
ndrug=10

chr_name1 = "drug"
chr_name2 = "aciami"

# color_log: whether to color by log of matches, or not
# interp10: whether to cap the matches at 10.
# alpha: the line transparency
color_log = False
interp10 = True
alpha = 0.5


nhits = data['nmatches'].values

if color_log: nhits = np.log10(nhits)

if interp10:
    inter = np.interp(nhits,(1,10),(-0,+1))
else:
    n_max = nhits.max()
    n_min = nhits.min()
    inter = np.interp(nhits,(n_min,n_max),(-0,+1))

line_colors = [rgb_convert(col_map(i),alpha) for i in inter]

f_out = open("./circos/circos_links_noscapine.txt","w")

for i, line in enumerate(data.itertuples()):
    if line.nmatches>0:
        link_line = f"{chr_name1} {ndrug} {ndrug+1} {chr_name2} {line.Index} {line.Index+1} color={line_colors[i]}\n"
        f_out.write(link_line)
    
f_out.close()
    


### do the same for athamontanolide

In [10]:
athamontanolide = "O=C1C(C2[C@@H](OC(C)=O)CC(C)=C3C=C[C@](C)(O)C3C2O1)=C"

search_molecule = Chem.MolFromSmiles(athamontanolide)
match_data = []

for s in tqdm(data.smiles):
    m = Chem.MolFromSmiles(s)
    match_data.append(len(search_molecule.GetSubstructMatches(m)))
    
data["nmatches"] = match_data
data.sort_values("nmatches",ascending=True,inplace=True)

  0%|          | 0/11759 [00:00<?, ?it/s]

In [11]:
# color and location settings
ndrug=5

chr_name1 = "drug"
chr_name2 = "aciami"

# color_log: whether to color by log of matches, or not
# interp10: whether to cap the matches at 10.
# alpha: the line transparency
color_log = False
interp10 = True
alpha = 0.5


nhits = data['nmatches'].values

if color_log: nhits = np.log10(nhits)

if interp10:
    inter = np.interp(nhits,(1,10),(-0,+1))
else:
    n_max = nhits.max()
    n_min = nhits.min()
    inter = np.interp(nhits,(n_min,n_max),(-0,+1))

line_colors = [rgb_convert(col_map(i),alpha) for i in inter]

f_out = open("./circos/circos_links_athamontanolide.txt","w")

for i, line in enumerate(data.itertuples()):
    if line.nmatches>0:
        link_line = f"{chr_name1} {ndrug} {ndrug+1} {chr_name2} {line.Index} {line.Index+1} color={line_colors[i]}\n"
        f_out.write(link_line)
    
f_out.close()
    


### draw chord diagrams

In [12]:
import os 

In [29]:
# draw for athamontanolide
# replace the path below with your local circos install.

!..\..\..\..\..\circos-0.69-9\bin\circos -conf ./circos/circos_athamontanolide.conf

# move output into the circos folder
os.rename("./circos.png","./circos/circos_athamontanolide.png")

debuggroup summary 0.14s welcome to circos v0.69-8 15 Jun 2019 on Perl 5.014002
debuggroup summary 0.14s current working directory C:/Users/Cernak/Desktop/qcr/8_atoms/acid_amine/zzz_paper_submit_2/Demo
debuggroup summary 0.14s command C:\Users\Cernak\Desktop\circos-0.69-9\bin\circos.exe -conf ./circos/circos_athamontanolide.conf
debuggroup summary 0.14s loading configuration from file ./circos/circos_athamontanolide.conf
debuggroup summary 0.14s found conf file ./circos/circos_athamontanolide.conf
debuggroup summary 0.27s debug will appear for these features: output,summary
debuggroup summary 0.28s bitmap output image ./.\circos.png
debuggroup summary 0.28s parsing karyotype and organizing ideograms
debuggroup summary 0.28s karyotype has 2 chromosomes of total size 12,017
debuggroup summary 0.28s applying global and local scaling
debuggroup summary 0.28s allocating image, colors and brushes
debuggroup summary 1.82s drawing 2 ideograms of total size 12,017
debuggroup summary 1.82s drawi

In [30]:
# draw for noscapine
# replace the path below with your local circos install.

!..\..\..\..\..\circos-0.69-9\bin\circos -conf ./circos/circos_noscapine.conf
os.rename("./circos.png","./circos/circos_noscapine.png")

debuggroup summary 0.14s welcome to circos v0.69-8 15 Jun 2019 on Perl 5.014002
debuggroup summary 0.14s current working directory C:/Users/Cernak/Desktop/qcr/8_atoms/acid_amine/zzz_paper_submit_2/Demo
debuggroup summary 0.14s command C:\Users\Cernak\Desktop\circos-0.69-9\bin\circos.exe -conf ./circos/circos_noscapine.conf
debuggroup summary 0.14s loading configuration from file ./circos/circos_noscapine.conf
debuggroup summary 0.14s found conf file ./circos/circos_noscapine.conf
debuggroup summary 0.28s debug will appear for these features: output,summary
debuggroup summary 0.28s bitmap output image ./.\circos.png
debuggroup summary 0.28s parsing karyotype and organizing ideograms
debuggroup summary 0.28s karyotype has 2 chromosomes of total size 12,017
debuggroup summary 0.28s applying global and local scaling
debuggroup summary 0.28s allocating image, colors and brushes
debuggroup summary 1.82s drawing 2 ideograms of total size 12,017
debuggroup summary 1.82s drawing highlights and 