### Write link files for chord diagrams. This is for the circos package.

Download circos here: http://circos.ca/software/download/

There are pre-written files in the ./circos directory: 
- circos_athamontanolide.conf
- circos_noscapine.conf
- ticks.conf
- kary_single_plainbands.txt

These are configuration files used by the circos package, used to adjust the appearance of the chord diagram.



In [1]:
from rdkit import Chem
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import cm

from tqdm.notebook import tqdm

In [2]:
col_map = plt.cm.get_cmap('plasma')

def rgb_convert(rgb,alpha):
    """ rgb is a tuple of rgb in 0-1
    alpha is the alpha. redundancy!"""

    rgb = [round(255*x,5) for x in rgb[:3]]

    return f"({rgb[0]},{rgb[1]},{rgb[2]},{alpha})"

### load data

In [3]:
data = pd.read_csv("./data_files/smiles_min_dist_dbank.csv",index_col=0)
data.sort_values("min_dist_all",inplace=True)
data.reset_index(drop=True,inplace=True)
data.head()

Unnamed: 0,smiles,min_dist_all,natoms,drugbank_matches
0,C=CC(=O)O,1.0,5,32
1,CCC(=O)O,1.0,5,223
2,NCCCC(=O)O,2.0,7,31
3,CCC(O)O,2.0,5,64
4,CNC=CC(=O)O,2.0,7,4


### load drug and search

In [4]:
noscapine = "O=C2O[C@@H](c1ccc(OC)c(OC)c12)[C@@H]5N(C)CCc4c5c(OC)c3OCOc3c4"

In [5]:
# 10 seconds
search_molecule = Chem.MolFromSmiles(noscapine)
match_data = []

for s in tqdm(data.smiles):
    m = Chem.MolFromSmiles(s)
    match_data.append(len(search_molecule.GetSubstructMatches(m)))

  0%|          | 0/57230 [00:00<?, ?it/s]

In [6]:
data["nmatches"] = match_data
data.sort_values("nmatches",ascending=True,inplace=True)

### process and write linker file

In [7]:
# color and location settings
# location on the black band - arbitrary
ndrug=10

chr_name1 = "drug"
chr_name2 = "aciami"

# color_log: whether to color by log of matches, or not
# interp10: whether to cap the matches at 10.
# alpha: the line transparency
color_log = False
interp10 = True
alpha = 0.5


nhits = data['nmatches'].values

if color_log: nhits = np.log10(nhits)

if interp10:
    inter = np.interp(nhits,(1,10),(-0,+1))
else:
    n_max = nhits.max()
    n_min = nhits.min()
    inter = np.interp(nhits,(n_min,n_max),(-0,+1))

line_colors = [rgb_convert(col_map(i),alpha) for i in inter]

f_out = open("./circos/circos_links_noscapine.txt","w")

for i, line in enumerate(data.itertuples()):
    if line.nmatches>0:
        link_line = f"{chr_name1} {ndrug} {ndrug+1} {chr_name2} {line.Index} {line.Index+1} color={line_colors[i]}\n"
        f_out.write(link_line)
    
f_out.close()
    


### do the same for athamontanolide

In [8]:
athamontanolide = "O=C1C(C2[C@@H](OC(C)=O)CC(C)=C3C=C[C@](C)(O)C3C2O1)=C"

search_molecule = Chem.MolFromSmiles(athamontanolide)
match_data = []

for s in tqdm(data.smiles):
    m = Chem.MolFromSmiles(s)
    match_data.append(len(search_molecule.GetSubstructMatches(m)))
    
data["nmatches"] = match_data
data.sort_values("nmatches",ascending=True,inplace=True)

  0%|          | 0/57230 [00:00<?, ?it/s]

In [9]:
# color and location settings
ndrug=5

chr_name1 = "drug"
chr_name2 = "aciami"

# color_log: whether to color by log of matches, or not
# interp10: whether to cap the matches at 10.
# alpha: the line transparency
color_log = False
interp10 = True
alpha = 0.5


nhits = data['nmatches'].values

if color_log: nhits = np.log10(nhits)

if interp10:
    inter = np.interp(nhits,(1,10),(-0,+1))
else:
    n_max = nhits.max()
    n_min = nhits.min()
    inter = np.interp(nhits,(n_min,n_max),(-0,+1))

line_colors = [rgb_convert(col_map(i),alpha) for i in inter]

f_out = open("./circos/circos_links_athamontanolide.txt","w")

for i, line in enumerate(data.itertuples()):
    if line.nmatches>0:
        link_line = f"{chr_name1} {ndrug} {ndrug+1} {chr_name2} {line.Index} {line.Index+1} color={line_colors[i]}\n"
        f_out.write(link_line)
    
f_out.close()
    


### draw chord diagrams

In [10]:
import os 

In [11]:
# draw for athamontanolide
# replace the path below with your local circos install.
# if there are issues running this inside the notebook, run it directly on the command line.

!../../../../../circos-0.69-9/bin/circos -conf ./circos/circos_athamontanolide.conf

# move output into the circos folder
os.rename("./circos.png","./circos/demo_circos_athamontanolide.png")

In [13]:
# draw for noscapine
# replace the path below with your local circos install.

!../../../../../circos-0.69-9/bin/circos -conf ./circos/circos_noscapine.conf
os.rename("./circos.png","./circos/demo_circos_noscapine.png")