In [1]:
#import modules
import os
import random
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

In [2]:
#read csv into a dataframe
numts=pd.read_csv('/Volumes/motilin/balint/numt/3numt_array.csv')
numts=numts[numts['mt_start']<17245]#filter out the abnormal numts coming from the dmtDNA alignment
numts=numts.sort_values(by='mt_start')
numts=numts[(numts['mt_start']+numts['mt_length'])<17245]

In [3]:
#try to get as different colors as possible
colors=pd.Series(['#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6',
                  '#bcf60c','#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3',
                  '#808000', '#ffd8b1','#000075', '#00FF7F','#000000'], index=np.arange(1,22,1))

In [5]:
#get custom legend colors
custom_lines_color=colors.apply(lambda color: Line2D([0],[0],
                                                         color=color,
                                                         lw=0,
                                                         marker='s',
                                                        markersize=7)).tolist()

In [49]:
#function for drawing brackets to annotate each mitochondrial part
def annotate_mitochondrion(ax, xspan, text):
    """Draws an annotated brace on the axes."""
    xmin, xmax = xspan
    xspan = xmax - xmin
    ax_xmin, ax_xmax = ax.get_xlim()
    xax_span = ax_xmax - ax_xmin
    ymin, ymax = ax.get_ylim()
    yspan = 10
    resolution = int(xspan/xax_span*100)*2+1 # guaranteed uneven
    beta = 300./xax_span # the higher this is, the smaller the radius

    x = np.linspace(xmin, xmax, resolution)
    x_half = x[:resolution//2+1]
    y_half_brace = (1/(1.+np.exp(-beta*(x_half-x_half[0])))
                    + 1/(1.+np.exp(-beta*(x_half-x_half[-1]))))
    y = np.concatenate((y_half_brace, y_half_brace[-2::-1]))
    y = ymin + (.15*y - .01)*yspan # adjust vertical position

    ax.autoscale(False)
    ax.plot(x, y, color='black', lw=1)

    ax.text((xmax+xmin)/2., ymin+.25*yspan, text, ha='center', va='bottom', fontsize=14,rotation=75)

In [7]:
#read in the mitochondrial annotation and add d-loop
mitos=pd.read_csv('/Volumes/motilin/balint/numt/mitos_annotation.bed',sep='\t',header=None)
mitos.columns=['MT','START','END','DESCRIPTION','SIG','STRAND']
mitos.loc[len(mitos)]=['MT',15445,17245,'D-loop',0.001,'+']

In [141]:
intragenic_yticklabels=['SESN1','LRRC9','LRRC9','SEC23B','CFAP300','PLEKHA1','MEMO1','MEMO1','',
                       'KIAA1328','PYGO2','KIAA1328','MED13L','']

In [142]:
#function for visualizing chr located and intragenic numts
def chr_numts(row,axs,numt_type):
    global numt_tracker
    global intragenic_tracker
    if numt_type=='chr':
        axs.barh(numt_tracker,row['mt_length'],left=row['mt_start'],height=1,
             color=colors[int(row['g_id'])])
        numt_tracker+=1
    else:
        if type(row['ensembl_description'])!=float:
            axs.barh(numt_tracker,row['mt_length'],left=row['mt_start'],height=1,
                 color=colors[int(row['g_id'])])
            axs.text(row['mt_start']+row['mt_length']+150,numt_tracker-0.5,intragenic_yticklabels[intragenic_tracker])
            intragenic_tracker+=1
        else:
            axs.barh(numt_tracker,0.01,left=row['mt_start'],height=1,
                 color='grey')
        numt_tracker+=1

In [143]:
#function for modify the given axs
def axes_mod(axs,xlabel,ylabel,xlim,ylim,yticklabels,fontsize,text,textposition):
    axs.set_xlabel(xlabel,fontsize=fontsize)
    axs.set_ylabel(ylabel,fontsize=fontsize)
    axs.set_xlim(xlim)
    axs.set_ylim(ylim)
    axs.set_yticklabels(yticklabels)
    axs.text(textposition[0],textposition[1],text,fontsize=30)

In [144]:
fig,axs=plt.subplots(1,2, figsize=(15,7))

chr_mask=numts.apply(lambda row:len(row['g_id'])<3,axis=1)

axes_mod(axs[0],
         'Mitochondrial nucleotides (bp)',
         'Chromosomal numts',
         (0,17000),
         (-8,50),
         ['',0,'','','','',50],
         20,'(a)',(-40,52))
numt_tracker=0
numts[chr_mask].apply(chr_numts,args=(axs[0],'chr',),axis=1)
for index, start in enumerate(mitos['START']):
    if (mitos['END'][index]-start)>100:
        annotate_mitochondrion(axs[0],(start,mitos['END'][index]),mitos['DESCRIPTION'][index])
legend=axs[0].legend(custom_lines_color,colors.index.values, loc='upper left', title='Genomic location (Chr)',
                  fontsize=10,ncol=3)
axs[0].add_artist(legend)

axes_mod(axs[1],
         'Mitochondrial nucleotides (bp)',
         'Intragenic numts',
         (0,17000),
         (-8,50),
         ['',0,'','','','',50],
         20,'(b)',(-40,52))
intragenic_tracker=0
axs[1].set_ylim(-8,50)
numt_tracker=0
numts[chr_mask].apply(chr_numts,args=(axs[1],'intragenic',),axis=1)
for index, start in enumerate(mitos['START']):
    if (mitos['END'][index]-start)>100:
        annotate_mitochondrion(axs[1],(start,mitos['END'][index]),mitos['DESCRIPTION'][index])
plt.tight_layout()
plt.savefig('../results/Fig2.png',dpi=450)
plt.close()

  axs.set_yticklabels(yticklabels)
  axs.set_yticklabels(yticklabels)
