### Write link files for chord diagrams. This is for the circos package.

Download circos here: http://circos.ca/software/download/

There are pre-written files in the ./circos directory: 
- circos_athamontanolide.conf
- circos_noscapine.conf
- ticks.conf
- kary_single_plainbands.txt

These are configuration files used by the circos package, used to adjust the appearance of the chord diagram.



In [1]:
from rdkit import Chem
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import cm

from tqdm.notebook import tqdm



In [2]:
col_map = plt.cm.get_cmap('plasma')

def rgb_convert(rgb,alpha):
    """ rgb is a tuple of rgb in 0-1
    alpha is the alpha. redundancy!"""

    rgb = [round(255*x,5) for x in rgb[:3]]

    return f"({rgb[0]},{rgb[1]},{rgb[2]},{alpha})"

### load data

In [3]:
# swap out the data later...
data = pd.read_csv("../data_files/acid_amine_smiles_mindist_dbank.csv",index_col=0)
data.sort_values("min_dist_all",inplace=True)
data.reset_index(drop=True,inplace=True)
data.head()

Unnamed: 0,smiles,min_dist_all,natoms,drugbank_matches
0,CCC(=O)O,0.0,5,2356
1,C=CC(=O)O,0.0,5,321
2,C=CNC=CC(=O)O,1.0,8,26
3,CCC(=O)OCCN,1.0,8,82
4,C=CNOC(=O)CC,1.0,8,0


### load drug and search

In [4]:
noscapine = "O=C2O[C@@H](c1ccc(OC)c(OC)c12)[C@@H]5N(C)CCc4c5c(OC)c3OCOc3c4"

In [5]:
search_molecule = Chem.MolFromSmiles(noscapine)
match_data = []

for s in tqdm(data.smiles):
    m = Chem.MolFromSmiles(s)
    match_data.append(len(search_molecule.GetSubstructMatches(m)))

  0%|          | 0/222747 [00:00<?, ?it/s]

In [6]:
data["nmatches"] = match_data
data.sort_values("nmatches",ascending=True,inplace=True)

### process and write linker file

In [7]:
# color and location settings
# location on the black band - arbitrary
ndrug=10

chr_name1 = "drug"
chr_name2 = "aciami"

# color_log: whether to color by log of matches, or not
# interp10: whether to cap the matches at 10.
# alpha: the line transparency
color_log = False
interp10 = True
alpha = 0.5


nhits = data['nmatches'].values

if color_log: nhits = np.log10(nhits)

if interp10:
    inter = np.interp(nhits,(1,10),(-0,+1))
else:
    n_max = nhits.max()
    n_min = nhits.min()
    inter = np.interp(nhits,(n_min,n_max),(-0,+1))

line_colors = [rgb_convert(col_map(i),alpha) for i in inter]

f_out = open("./circos/circos_links_noscapine.txt","w")

for i, line in enumerate(data.itertuples()):
    if line.nmatches>0:
        link_line = f"{chr_name1} {ndrug} {ndrug+1} {chr_name2} {line.Index} {line.Index+1} color={line_colors[i]}\n"
        f_out.write(link_line)
    
f_out.close()
    


### do the same for athamontanolide

In [8]:
athamontanolide = "O=C1C(C2[C@@H](OC(C)=O)CC(C)=C3C=C[C@](C)(O)C3C2O1)=C"

search_molecule = Chem.MolFromSmiles(athamontanolide)
match_data = []

for s in tqdm(data.smiles):
    m = Chem.MolFromSmiles(s)
    match_data.append(len(search_molecule.GetSubstructMatches(m)))
    
data["nmatches"] = match_data
data.sort_values("nmatches",ascending=True,inplace=True)

  0%|          | 0/222747 [00:00<?, ?it/s]

In [9]:
# color and location settings
ndrug=5

chr_name1 = "drug"
chr_name2 = "aciami"

# color_log: whether to color by log of matches, or not
# interp10: whether to cap the matches at 10.
# alpha: the line transparency
color_log = False
interp10 = True
alpha = 0.5


nhits = data['nmatches'].values

if color_log: nhits = np.log10(nhits)

if interp10:
    inter = np.interp(nhits,(1,10),(-0,+1))
else:
    n_max = nhits.max()
    n_min = nhits.min()
    inter = np.interp(nhits,(n_min,n_max),(-0,+1))

line_colors = [rgb_convert(col_map(i),alpha) for i in inter]

f_out = open("./circos/circos_links_athamontanolide.txt","w")

for i, line in enumerate(data.itertuples()):
    if line.nmatches>0:
        link_line = f"{chr_name1} {ndrug} {ndrug+1} {chr_name2} {line.Index} {line.Index+1} color={line_colors[i]}\n"
        f_out.write(link_line)
    
f_out.close()
    


### draw chord diagrams

In [11]:
import os 

In [10]:
# draw for athamontanolide
# replace the path below with your local circos install.

!../../../../circos-0.69-9/bin/circos -conf ./circos/circos_athamontanolide.conf

# move output into the circos folder
os.rename("./circos.png","./circos/circos_athamontanolide.png")

debuggroup summary 0.18s welcome to circos v0.69-8 15 Jun 2019 on Perl 5.026001
debuggroup summary 0.21s current working directory /mnt/c/Users/Cernak/Desktop/qcr/8_atoms/acid_amine/zzz_paper_quarantine_zone
debuggroup summary 0.21s command ../../../../circos-0.69-9/bin/circos -conf ./circos/circos_athamontanolide.conf
debuggroup summary 0.21s loading configuration from file ./circos/circos_athamontanolide.conf
debuggroup summary 0.21s found conf file ./circos/circos_athamontanolide.conf
debuggroup summary 0.30s debug will appear for these features: output,summary
debuggroup summary 0.30s bitmap output image ./circos.png
debuggroup summary 0.30s parsing karyotype and organizing ideograms
debuggroup summary 0.32s karyotype has 2 chromosomes of total size 223,017
debuggroup summary 0.32s applying global and local scaling
debuggroup summary 0.32s allocating image, colors and brushes
debuggroup summary 1.32s drawing 2 ideograms of total size 223,017
debuggroup summary 1.32s drawing highlig

In [13]:
# draw for noscapine
# replace the path below with your local circos install.

!../../../../circos-0.69-9/bin/circos -conf ./circos/circos_noscapine.conf
os.rename("./circos.png","./circos/circos_noscapine.png")

debuggroup summary 0.16s welcome to circos v0.69-8 15 Jun 2019 on Perl 5.026001
debuggroup summary 0.18s current working directory /mnt/c/Users/Cernak/Desktop/qcr/8_atoms/acid_amine/zzz_paper_quarantine_zone
debuggroup summary 0.18s command ../../../../circos-0.69-9/bin/circos -conf ./circos/circos_noscapine.conf
debuggroup summary 0.18s loading configuration from file ./circos/circos_noscapine.conf
debuggroup summary 0.18s found conf file ./circos/circos_noscapine.conf
debuggroup summary 0.27s debug will appear for these features: output,summary
debuggroup summary 0.27s bitmap output image ./circos.png
debuggroup summary 0.27s parsing karyotype and organizing ideograms
debuggroup summary 0.28s karyotype has 2 chromosomes of total size 223,017
debuggroup summary 0.29s applying global and local scaling
debuggroup summary 0.29s allocating image, colors and brushes
debuggroup summary 1.23s drawing 2 ideograms of total size 223,017
debuggroup summary 1.23s drawing highlights and ideograms
