In [2]:
#importing the required modules
import os
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from scipy.stats import anderson, wilcoxon, ttest_ind

In [3]:
#function for drawing brackets to annotate each repeat family
def draw_brace(ax, xspan, text):
    """Draws an annotated brace on the axes."""
    xmin, xmax = xspan
    xspan = xmax - xmin
    ax_xmin, ax_xmax = ax.get_xlim()
    xax_span = ax_xmax - ax_xmin
    ymin, ymax = ax.get_ylim()
    yspan = ymax - ymin
    resolution = int(xspan/xax_span*100)*2+1 # guaranteed uneven
    beta = 300./xax_span # the higher this is, the smaller the radius

    x = np.linspace(xmin, xmax, resolution)
    x_half = x[:resolution//2+1]
    y_half_brace = (1/(1.+np.exp(-beta*(x_half-x_half[0])))
                    + 1/(1.+np.exp(-beta*(x_half-x_half[-1]))))
    y = np.concatenate((y_half_brace, y_half_brace[-2::-1]))
    y = ymin + (.15*y - .01)*yspan # adjust vertical position

    ax.autoscale(False)
    ax.plot(x, y, color='black', lw=1)

    ax.text((xmax+xmin)/2., ymin+.2*yspan, text, ha='center', va='bottom')

In [4]:
gc_numts_vs_genome=[]
gc_flankings_vs_genome=[]
gc_genomes=[]
p_values=[]
with open('../results/gcs_for_visualisation.txt')as infile:
    content=infile.readlines()
    nuge_stat=0
    flage_stat=0
    genomes_stat=0
    for index,line in enumerate(content):
        if index==1:
            nuge_stat=float(line.rsplit()[2][7:-1])
            numts_gc=list(map(float,content[index+1].rsplit()[0][:-1].split(',')))
            n_genomic_samples_gc=list(map(float,content[index+2].rsplit()[0][:-1].split(',')))
            gc_numts_vs_genome+=[numts_gc,n_genomic_samples_gc]
        elif index==5:
            flage_stat=float(line.split()[-1])
            flankings_gc=list(map(float,content[index+1].rsplit()[0][:-1].split(',')))
            f_genomic_samples_gc=list(map(float,content[index+2].rsplit()[0][:-1].split(',')))
            gc_flankings_vs_genome+=[flankings_gc,f_genomic_samples_gc]
        elif index==9:
            genomes_stat=float(line.rsplit()[1])
            mt_gc=list(map(float,content[index+1].rsplit()[0][:-1].split(',')))
            g_gc=list(map(float,content[index+2].rsplit()[0][:-1].split(',')))
            gc_genomes+=[mt_gc,g_gc]
    p_values.append(nuge_stat)
    p_values.append(flage_stat)
    p_values.append(genomes_stat)
p_values=pd.Series(p_values,index=['nuge_stat','flage_stat','genomes_stat'])
p_values

nuge_stat       3.341610e-07
flage_stat      9.052497e-03
genomes_stat    4.415548e-03
dtype: float64

In [5]:
#create a function for the statistical annotation of the graph
def statistical_annotation(data, significance, positions, height, rounding):
    x1, x2 = positions[0],positions[1]
    maximum = max([max(data[0]),max(data[1])])
    y, h, col = maximum + height + 0.03, 0.03, 'k'
    plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=2.5, c = col)
    if significance < 0.05:
        plt.text((x1+x2)*.5, y+h, '$\it{P}$ ~ %s' % np.round(significance,rounding), ha='center',
                 va='bottom', color = col, fontsize = 20)
    else:
        plt.text((x1+x2)*.5, y+h, "n.s.", ha='center', va='bottom', color = col, fontsize = 20)

In [6]:
#visualizing
plt.style.use('fivethirtyeight')
bracket_start=0
fig, axes = plt.subplots(1, 1, figsize = (12,5))
axes=sns.violinplot(data=[mt_gc,g_gc,numts_gc,n_genomic_samples_gc,flankings_gc,f_genomic_samples_gc],
                      palette=['lightblue','orange','lightgreen','orange','grey','orange'])
axes.set_ylim(0,1)
axes.plot(6*[1.5],[0,0.2,0.4,0.6,0.8,1],'-',color='lightgrey',linewidth=1)
axes.plot(6*[3.5],[0,0.2,0.4,0.6,0.8,1],'-',color='lightgrey',linewidth=1)
axes.set_xticklabels(['mitochondrion','genome','numt','genome','flanking','genome'],fontsize=20)
axes.set_ylabel('GC content',fontsize=20)
statistical_annotation([mt_gc,g_gc,numts_gc], p_values['genomes_stat'], [0,1], 0.1,rounding=3)
statistical_annotation([numts_gc,n_genomic_samples_gc], p_values['nuge_stat'], [2,3], 0.1,rounding=7)
statistical_annotation([flankings_gc,f_genomic_samples_gc], p_values['flage_stat'], [4,5], 0.1,rounding=3)
plt.tight_layout()
plt.savefig('../results/fig3.tiff',dpi=1000)
plt.close()