In [4]:
#import modules
import os
import random
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

In [5]:
matplotlib.use('Agg')

In [6]:
#read csv into a dataframe
numts=pd.read_csv(r'Z:\balint\numt/3numt_array.csv')
numts=numts[numts['mt_start']<17245]#filter out the abnormal numts coming from the dmtDNA alignment
numts=numts.sort_values(by='mt_start')
trial_numts=numts[(numts['mt_start']+numts['mt_length'])<17245]

In [7]:
#try to get as different colors as possible
colors=pd.Series(['#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6',
                  '#bcf60c','#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3',
                  '#808000', '#ffd8b1','#000075', '#00FF7F','#000000'], index=np.arange(1,22,1))

In [8]:
#function for visualizing scaffold located numts
def scf_numts(row):
    global scaffold_tracker
    if len(row['g_id'])>3:
        ax2.barh(scaffold_tracker,row['mt_length'],left=row['mt_start'],height=1,
                         color='grey')
        scaffold_tracker+=1

In [9]:
#function for visualizing intragenic numts
def intragenic_numts(row):
    global intragenic_tracker
    if type(row['ensembl_description'])!=float and row['g_id']!='GL018744':
        try:
            ax3.barh(intragenic_tracker,row['mt_length'],left=row['mt_start'],height=0.25,
                             color=colors[int(row['g_id'])])
            ax3.text(-2500,intragenic_tracker,intragenic_yticklabels[intragenic_tracker],fontsize=10)
            intragenic_tracker+=1
        except ValueError:
            pass

In [10]:
#function for visualizing chr located numts
def chr_numts(row):
    global numt_tracker
    try:
        if type(row['ensembl_description'])==float:
            ax1.barh(numt_tracker,row['mt_length'],left=row['mt_start'],height=1,
                     color=colors[int(row['g_id'])])
            numt_tracker+=1
    except:
        pass

In [11]:
#get custom legend colors
custom_lines_color=colors.apply(lambda color: Line2D([0],[0],
                                                         color=color,
                                                         lw=0,
                                                         marker='s',
                                                        markersize=7)).tolist()

In [12]:
#function for drawing brackets to annotate each mitochondrial part
def annotate_mitochondrion(ax, xspan, text):
    """Draws an annotated brace on the axes."""
    xmin, xmax = xspan
    xspan = xmax - xmin
    ax_xmin, ax_xmax = ax.get_xlim()
    xax_span = ax_xmax - ax_xmin
    ymin, ymax = ax.get_ylim()
    yspan = 10
    resolution = int(xspan/xax_span*100)*2+1 # guaranteed uneven
    beta = 300./xax_span # the higher this is, the smaller the radius

    x = np.linspace(xmin, xmax, resolution)
    x_half = x[:resolution//2+1]
    y_half_brace = (1/(1.+np.exp(-beta*(x_half-x_half[0])))
                    + 1/(1.+np.exp(-beta*(x_half-x_half[-1]))))
    y = np.concatenate((y_half_brace, y_half_brace[-2::-1]))
    y = ymin + (.15*y - .01)*yspan # adjust vertical position

    ax.autoscale(False)
    ax.plot(x, y, color='black', lw=1)

    ax.text((xmax+xmin)/2., ymin+.25*yspan, text, ha='center', va='bottom', fontsize=12.5,rotation=45)

In [13]:
#read in the mitochondrial annotation and add d-loop
mitos=pd.read_csv(r'Z:\balint\numt/mitos_annotation.bed',sep='\t',header=None)
mitos.columns=['MT','START','END','DESCRIPTION','SIG','STRAND']
mitos.loc[len(mitos)]=['MT',15445,17245,'D-loop',0.001,'+']

In [14]:
chr_mask=trial_numts.apply(lambda row:len(row['g_id'])<3,axis=1)

In [15]:
intragenic_yticklabels=['SESN1','LRRC9','LRRC9','SEC23B','CFAP300','PLEKHA1','MEMO1','MEMO1','KIAA1328',
                       'KIAA1328','PYGO2','KIAA1328','MED13L','']

In [16]:
plt.style.use('fivethirtyeight')
numt_tracker=0
scaffold_tracker=0
intragenic_tracker=0

plt.figure(figsize=(12,12))

ax1=plt.subplot(211)
ax1.set_ylim(-9,41)
ax1.set_xlim(0,17245)
ax1.set_ylabel('Chromosomal numts', fontsize=20)
ax1.set_xlabel('Mitochondrial nucleotides (bp)', fontsize=20)
ax1.set_xticklabels([0,2000,4000,6000,8000,10000,12000,14000,16000,17245,''],fontsize=20)
ax1.set_yticklabels(['',0,'','','',40,'',''],fontsize=20)
trial_numts.apply(chr_numts,axis=1)
legend=ax1.legend(custom_lines_color,colors.index.values, loc='upper left', title='Genomic location',
                  fontsize=10,ncol=3)
ax1.add_artist(legend)
#add curly bracket to annotate mitochondrial nucleotides
for index, start in enumerate(mitos['START']):
    if (mitos['END'][index]-start)>100:
        annotate_mitochondrion(ax1,(start,mitos['END'][index]),mitos['DESCRIPTION'][index])
ax1.text(-2000,40,'(a)', fontsize=30)

ax2=plt.subplot(223)
trial_numts.apply(scf_numts,axis=1)
ax2.set_xlim(0,20000)
ax2.set_xticklabels([0,4000,8000,12000,16000,'',''],fontsize=20)
ax2.set_yticklabels(['',0,'','','','',100], fontsize=20)
ax2.set_ylabel('Scaffold numts', fontsize=20)
ax2.set_xlabel('Mitochondrial nucleotides (bp)', fontsize=20)
ax2.text(-5000,102,'(b)', fontsize=30)

ax3=plt.subplot(224)
intragenic_df=trial_numts[chr_mask]
intragenic_df.apply(intragenic_numts,axis=1)
ax3.set_xlim(-200,20000)
ax3.set_xticklabels(['',0,4000,8000,12000,16000,'',''],fontsize=20)
ax3.set_yticklabels([])
ax3.text(-4000,3.5,'Intragenic numts',fontsize=20,rotation='vertical')
ax3.set_xlabel('Mitochondrial nucleotides (bp)', fontsize=20)
ax3.text(-4500,12.75,'(c)', fontsize=30)
plt.tight_layout()
plt.savefig('../results/fig1.png',dpi=450)

  del sys.path[0]
  
