## Plot figures for recombination and mutation

In [None]:
import sys, os
import numpy as np
import pandas as pd
import gzip, pickle
import datetime
import seaborn as sns
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages


path = "/.../ProbPolymorphism"     #path to repository
if not os.getcwd() == path:
    os.chdir(path)
from shared import recombination

Violin plot for variance due to recombination by chromosome.

In [None]:
path = "..."                            #path to data
if not os.getcwd() == path:
    os.chdir(path)
fname = '...'                            #file produced by ARMA_pq_analysis_all_by_chrom.py
with gzip.open(fname, 'rb') as variates:
    variates = pickle.load(variates)

with PdfPages("Article_references/violin_plot_sexav.pdf") as pdf:
    f = plt.figure(figsize=(15,6))
    variates["chr"] = pd.to_numeric(variates["chr"])
    sns.set_style("whitegrid")
    ax = sns.violinplot(x="chr", y="vars", data=variates, color='b')
    ax.set_ylabel('$\hat{\sigma }^2_{rec}$', fontsize=16)
    ax.set_xlabel('Chromosome', fontsize=16)
    ax.ticklabel_format(axis='y', style='sci', scilimits=(0,0))
    ax.set_ylim([0, 1e-6])
    d = pdf.infodict()
    d['Title'] = 'Violin plot for variance due to recombination - ' +'(sex-averaged)'
    d['Author'] = 'H. Simon'
    d['Subject'] = 'Datafile: ' + fname
    d['Keywords'] = 'Notebook: ' + 'Plot figures for recombination and mutation.ipynb'
    d['CreationDate'] = datetime.datetime.today()
    pdf.savefig(f, orientation='landscape')
plt.show()

Heat map for posterior probability by mutation type and chromosome.

In [None]:
path = "..."                                            #path to data 
if not os.getcwd() == path:
    os.chdir(path)
chroms = np.arange(1, 23, 2).astype('str')
sns.set_style("whitegrid")
result = np.zeros((12, len(chroms)))
index = ['C->T', 'G->A', 'T->C', 'A->G', 'C->G', 'G->C', 'T->G', 'A->C', 'T->A', 'A->T', 'C->A', 'G->T']
result = pd.DataFrame(result, index=index, columns=chroms)
for c in chroms:
    fname = 'Recombination_data/ARMApq_results_sexav_ARARMApq_ch' + c + '.csv'
    cfile = pd.read_csv(fname, sep=',', index_col=0)
    result[c[:2]] = cfile['pval']
newix = [x[0] + ' \u2192 ' + x[3] for x in list(result.index.values)]
result.index = newix

c = np.linspace(0,1,101)
colors = plt.get_cmap("Blues_r",101)(c)
colors[0:5] = colors[0]
cmap = matplotlib.colors.ListedColormap(colors)    # Create a new colormap with colors

with PdfPages("Article_references/heatmap.pdf") as pdf:
    f = plt.figure()
    ax = sns.heatmap(result, cmap=cmap, cbar_kws={'label': 'Probability'})
    ax.set_xlabel('Chromosome')
    ax.set_ylabel('Mutation direction')
    d = pdf.infodict()
    d['Title'] = 'Heatmap for influence of recombination - ' +'(sex-averaged)'
    d['Author'] = 'H. Simon'
    d['Subject'] = 'Datafile: ' + fname[:-6] + 'X.csv'
    d['Keywords'] = 'Notebook: ' + 'Plot figures for recombination and mutation.ipynb'
    d['CreationDate'] = datetime.datetime.today()
    pdf.savefig(f, orientation='landscape')
plt.show()

Plot variances in recombination rate for Discussion.

In [None]:
path = "..."
if not os.getcwd() == path:
    os.chdir(path)
chroms, variances = list(), list()
for chrom in np.arange(1,23).astype(str):
    sex = 'sex-averaged'       #options are male, female, sex-averaged
    csv_filename = 'Recombination_data/recomb_table_SW_sex-averaged_ch' + chrom + '.csv'
    data_table = pd.read_csv(csv_filename, sep=',', index_col=0)
    data_table = recombination.correct_missing_data(data_table, 'LOCF', None)
    std_rates = data_table['stdrate'].values
    print("%2d" % int(chrom), "%.3f" % np.mean(std_rates), "%.3f" % np.var(std_rates))
    chroms.append(chrom)
    variances.append(np.var(std_rates))
data_tuples = list(zip(chroms,variances))
table = pd.DataFrame(data_tuples, columns= ['Chromosome', 'Variance'])

c1 = "denim blue"
c2 = "pale red"
colors = [c1, c1, c1, c1, c1, c1, c1, c1, c2, c1, c1, c1, c1, c1, c2, c2, c2, c1, c1, c1, c1, c2]
colmap = sns.xkcd_palette(colors)
sns.palplot(colmap)

sns.set_style("whitegrid")
table = table.astype(float)
table['Chromosome'] = table['Chromosome'].astype(int)
ax = table.plot("Chromosome", "Variance", kind="barh", color=colmap, legend=False)
ax.set_xlabel("Normalized variance in recombination rate")
ax.get_figure().savefig("Article_references/recomb_bar_plot.pdf", bbox_inches='tight')