In [0]:
import scandir
import os
import rpy2
from rpy2.robjects import pandas2ri
pandas2ri.activate()
import rpy2.robjects as ro
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import scipy as sp
import dill
import random
import cyvcf
from hdfstorehelper import HDFStoreHelper
import statsmodels.api as sm
import statsmodels.formula.api as smf
import operator
import traceback
import warnings
warnings.filterwarnings('ignore',category=pd.io.pytables.PerformanceWarning)
%load_ext rpy2.ipython
r = ro.r

In [0]:
analysis_dir = "/home/cfriedline/eckertlab/gypsy_indiv/masked/analysis/samtools1.2_no_otis/beagle40/"
snp_file_gz = "isect.vcf.gz.sorted.gz"

In [0]:
hdf = HDFStoreHelper(os.path.join(analysis_dir, "isect.hd5"))
hdf_all = HDFStoreHelper(os.path.join(analysis_dir, "gypsy_samtools12_imputed40.vcf.gz.hd5"))

## write piMASS files

In [0]:
pimass_pheno = hdf_all["pca_std_pheno"][["Population",
                                             "Number",
                                             "Mass","Pupual Duration", "Total Dev Time"]]
pimass_pheno.head()

In [0]:
hdf['pimass_pheno'] = pimass_pheno

In [0]:
pca_x = hdf['pca_x']
pca_x.head()

In [0]:
pca_std_pheno = pimass_pheno.join(pca_x, how="inner")

In [0]:
pca_std_pheno.head()

In [0]:
pimass_pheno_pca = pca_std_pheno[[x for x in pca_std_pheno if "PC" in x or 'Mass' in x or 'Pupual' in x or 'Total Dev' in x]]
pimass_pheno_pca.columns = [x.replace(" ", "_") for x in pimass_pheno_pca.columns]
pimass_pheno_pca.index = [x for x in pimass_pheno_pca.index]
phenos = ["Mass", "Pupual_Duration", "Total_Dev_Time"]
for p in phenos:
    mod = smf.ols(formula="%s~PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8" % p, data=pimass_pheno_pca)
    res = mod.fit()
    col = "%s_resid" % p
    col = col.lower()
    pimass_pheno[col] = res.resid

In [0]:
z12_swapped = hdf["z12_swapped"]

In [0]:
z12_swapped.head()

In [0]:
translation_df = pd.read_csv("translation_table.csv", sep="\t", index_col=0)

def get_correct_name(row, trans):
    trans[row.name] = "%s_%d_%d" % (row['pop'], row.indiv, row.dup)

name_translation = {}
translation_df.apply(get_correct_name, args=(name_translation,), axis=1);

In [0]:
readvcf = open(os.path.join(analysis_dir, snp_file_gz))
reader = cyvcf.VCFReader(readvcf)
gt_base_data = {}
gt_ref_alt = {}
at = 0
for snp in reader:
    snp_id = "%s_%d" % (snp.CHROM, snp.POS)
    gt_ref_alt[snp_id] = {'ref': snp.REF, 'alt': snp.ALT[0]}
    for sample in snp.samples:
        if not snp_id in gt_base_data:
            gt_base_data[snp_id] = {}
        sample_name = name_translation[sample.sample]
        bases = sample.gt_bases
        gt_base_data[snp_id][sample_name] = bases
    at += 1
    if at % 1000 == 0:
        print(at)
gt_base_df = pd.DataFrame(gt_base_data)
readvcf.close()

In [0]:
gt_base_df.head()

In [0]:
def swap_gt_alleles(gt, het):   
    if isinstance(gt, float): #NaN
        return np.NaN
    if gt is None:
        return np.NaN
    if gt[0] == gt[-1]:
        return gt.replace("|", "/")
    else:
        return het # already in minor/major
    
def swap_gt(snp):
    vc = snp.value_counts()
    counts = {}
    for v in vc.index:
        if not v[0] in counts:
            counts[v[0]] = 0.0
        if not v[-1] in counts:
            counts[v[-1]] = 0.0
        counts[v[0]] += vc[v]
        counts[v[-1]] += vc[v]
    counts2 = sorted(list(counts.items()), key=operator.itemgetter(1)) #e.g., [('A', 110.0), ('G', 236.0)]
    minor = counts2[0][0]
    major = counts2[1][0]
    het = "%s/%s" % (minor, major)
    gt_ref_alt[snp.name]['minor'] = minor
    gt_ref_alt[snp.name]['major'] = major
    return snp.apply(swap_gt_alleles, args=(het,))
gt_base_df_swapped = gt_base_df.apply(swap_gt)
gt_base_df_swapped.head()

In [0]:
def convert_GP_to_L(q):
    return pow(10,(-q/10.0))

def get_dosage(gp, index):
    if not gp:
        return ["NA"]
    gp2 = [x for x in gp]
    dosage = (gp2[1] + 2*gp2[index])
    assert dosage >=0 and dosage <=2
    return gp, gp2, dosage

def get_GP(sample):
    if sample['GT'] is None:
        return None, None
    return sample['GT'], sample['GP']

def get_major_minor(snp, reader):
    d = snp.name.split("_")
    loc = int(d[-1])
    contig = "_".join(d[0:-1])
    minor = gt_ref_alt[snp.name]['minor']
    major = gt_ref_alt[snp.name]['major']
    ref = gt_ref_alt[snp.name]['ref']
    alt = gt_ref_alt[snp.name]['alt']
    minor_index = 0 #assume minor is reference
    if minor == alt:
        minor_index = 2
    dosages = []
    samples = []
    thesnp = list(reader.fetch(contig, loc, loc))[0]
    for sample in thesnp.samples:
        gt, gp = get_GP(sample)
        dosages.append(get_dosage(gp, minor_index)[-1])
        samples.append(sample.sample)
    data = [minor, major]
    index = ["minor", "major"]
    index.extend(samples)
    data.extend(dosages)
    ret = pd.Series(data, index=index)
    return ret
h = open(os.path.join(analysis_dir, snp_file_gz))
reader = cyvcf.VCFReader(h)
pimass_gt = gt_base_df_swapped.apply(get_major_minor, args=(reader,)).T
pimass_gt.head()

In [0]:
pimass_gt.head()

In [0]:
pimass_pheno = pimass_pheno.reindex(index=gt_base_df.index)

In [0]:
%R -i pimass_pheno

In [0]:
%%R
massx = qqnorm(pimass_pheno$mass_resid, plot.it=F)$x
tdtx = qqnorm(pimass_pheno$total_dev_time_resid, plot.it=F)$x
pdx = qqnorm(pimass_pheno$pupual_duration_resid, plot.it=F)$x

In [0]:
pimass_pheno['massx'] = r('massx')
pimass_pheno['tdtx'] = r('tdtx')
pimass_pheno['pdx'] = r('pdx')

In [0]:
pimass_pheno.head()

In [0]:
pimass_pheno.massx.to_csv(os.path.join(analysis_dir, "pimass_mass.txt"),
                                     index=False,
                                     header=False)
pimass_pheno.tdtx.to_csv(os.path.join(analysis_dir, "pimass_tdt.txt"),
                                     index=False,
                                     header=False)
pimass_pheno.pdx.to_csv(os.path.join(analysis_dir, "pimass_pd.txt"),
                                     index=False,
                                     header=False)
pimass_pheno.to_csv(os.path.join(analysis_dir, "pimass_pheno.txt"),
                                     index=True,
                                     header=True)
pimass_gt.to_csv(os.path.join(analysis_dir, "pimass_gt.txt"),
                index=True,
                header=False)

In [0]:
pimass_contigs = {}
with open(os.path.join(analysis_dir, "pimass_loc.txt"), "w") as o:    
    for x in pimass_gt.index:
        data = x.split("_")
        contig = "_".join(data[0:-1])
        pos = data[-1]
        if not contig in pimass_contigs:
            pimass_contigs[contig] = []
        pimass_contigs[contig].append(pos)
    
    chrom_id = 1
    for contig, positions in list(pimass_contigs.items()):
        for p in positions:
            o.write("%s_%s\t%s\t%d\n" % (contig, p, p, chrom_id))
        chrom_id += 1

In [0]:
import random
def create_pimass_run_files(num_runs):
    phenos = ["mass", 'tdt', 'pd']
    for p in phenos:
        with open(os.path.join(analysis_dir, "pimass_%s_run.txt" % p), "w") as o:
            for i in xrange(num_runs):
                cmd = "~/g/src/pimass/pimass-lin \
-g pimass_gt.txt \
-p pimass_%s.txt -pos pimass_loc.txt \
-o pimass_%s_out_%d \
-w 1000000 \
-s 10000000 \
-num 500 \
-smin 1 \
-smax 100 \
-hmin 0.01 \
-hmax 0.9 \
-pmin 1 \
-pmax 1000 \
-r %.0f" % (p, p, i, int(random.getrandbits(32)))
                o.write("%s\n" % cmd) 
                


def create_qsub_files():
    files = !ls {analysis_dir}*run.txt
    for f in files:
        with open("%s_qsub.sh" % f, "w") as o:
            o.write("""#!/bin/bash
#$ -j y
#$ -V
#$ -N pimass_%s
#$ -cwd
parallel -a %s
""" % (os.path.basename(f).split("_")[1], f))
            
create_pimass_run_files(10)
create_qsub_files()

In [0]:
hdf['pimass_gt'] = pimass_gt

## run piMASS

```bash
./run_pimass.sh
mv output output_comeault_isect
mv pimass*.txt output_comeault_isect_infiles
```



## analyze and process piMASS

In [0]:
assembly = "/home/cfriedline/gpfs/assemblies/gypsy/masurca_new/CA/10-gapclose/genome.ctg.fasta"

In [0]:
filedir = "/home/cfriedline/eckertlab/gypsy_indiv/masked/analysis/samtools1.2_no_otis/beagle40/output_comeault_isect/"

In [0]:
def dump_session():
    dill.settings['recurse'] = True
    dill.settings['fmode'] = dill.HANDLE_FMODE
    dill.dump_session(filename=os.path.join(filedir, "pimass.dill"))

In [0]:
path_files = {}
mcmc_files = {}
gamma_files = {}
snp_files = {}
for root, dirs, files in scandir.walk(filedir):
    for f in files:
        d = f.split("_")
        pheno = d[1]
        if not pheno in path_files:
            path_files[pheno] = []
            mcmc_files[pheno]= []
            gamma_files[pheno] = []
            snp_files[pheno] = []
        if 'path' in f:
            path_files[pheno].append(os.path.join(root, f))
        elif 'mcmc' in f:
            mcmc_files[pheno].append(os.path.join(root, f))
        elif 'gamma' in f:
            gamma_files[pheno].append(os.path.join(root, f))
        elif 'snp' in f:
            snp_files[pheno].append(os.path.join(root, f))

In [0]:
%%R
library(coda)

In [0]:
mcmc = r('mcmc')
mcmc_list = r('mcmc.list')

In [0]:
dfs = {}
phenos = ["mass", "pd", "tdt"]
for pheno in phenos:
    frames = [pd.read_csv(x,sep="\t") for x in path_files[pheno]]
    frames = [x.ix[:,:-1] for x in frames]
    for df in frames:
        df.columns = [x.strip() for x in df.columns]
    dfs[pheno] = frames

In [0]:
dfs['mass'][0].head()

In [0]:
path_mcmc_r = {}
path_mcmc = {}
thin = 1
for key, dflist in list(dfs.items()):
    path_mcmc_r[key] = [mcmc(pandas2ri.DataFrame(x.sample(frac=thin).sort_index())) for x in dflist]
    path_mcmc[key] = [x.sample(frac=thin).sort_index() for x in dflist]

In [0]:
path_mcmc_list_mass = mcmc_list(path_mcmc_r['mass'])
path_mcmc_list_pd = mcmc_list(path_mcmc_r['pd'])
path_mcmc_list_tdt = mcmc_list(path_mcmc_r['tdt'])

In [0]:
%R -i path_mcmc_list_mass -i path_mcmc_list_pd -i path_mcmc_list_tdt

In [0]:
%%R
effective_sizes_mass = lapply(path_mcmc_list_mass,effectiveSize)
effective_sizes_pd = lapply(path_mcmc_list_pd,effectiveSize)
effective_sizes_tdt = lapply(path_mcmc_list_tdt,effectiveSize)

In [0]:
def get_effective_sizes(r_name):
    df = pd.DataFrame([pandas2ri.ri2py(x) for x in r[r_name]])
    test = r[r_name].rx2(1)
    df.columns = r('names')(test)
    return df
ne_tdt = get_effective_sizes('effective_sizes_tdt')
ne_pd= get_effective_sizes('effective_sizes_pd')
ne_mass= get_effective_sizes('effective_sizes_mass')


In [0]:
print(ne_tdt.mean())
print(ne_tdt.std())


In [0]:
ne_pd.mean()

In [0]:
ne_mass.mean()

In [0]:
print("MASS", r("summary")(path_mcmc_list_mass))
print("PD", r("summary")(path_mcmc_list_pd))
print("TDT", r("summary")(path_mcmc_list_tdt))

In [0]:
%%R
plot(path_mcmc_list_mass)
plot(path_mcmc_list_pd)
plot(path_mcmc_list_tdt)

In [0]:
mcmc = {}
for pheno, files in list(mcmc_files.items()):
    if not pheno in mcmc:
        mcmc[pheno] = pd.DataFrame()
    for f in files:
        index = os.path.basename(f).split("_")[-1].split(".")[0]
        testdf = pd.read_csv(f, sep="\t")
        testdf.columns = ["%s_%s" % (x.strip(), index) for x in testdf.columns]
        mcmc[pheno] = pd.concat([mcmc[pheno], testdf], axis=1)

In [0]:
mcmc_mass = mcmc['mass']
mcmc_pd = mcmc['pd']
mcmc_tdt = mcmc['tdt']

In [0]:
def get_hmean_row(row):
    try:
        return sp.stats.hmean(row)
    except ValueError as e:
        return np.nan
    
def get_hmean(df, col_pattern):
    cols = ['rs','chr']
    cols.extend(["%s_hmean" % x for x in col_pattern])
    d = pd.DataFrame(columns=cols, index=df.index)
    d['rs'] = df.rs_1.values
    d["chr"] = df.chr_1.values
    for cp in col_pattern:
        d["%s_hmean" % cp] = np.abs(df[[x for x in df if cp in x]]).apply(get_hmean_row, axis=1).values
    return d
mcmc_mass_hmean = get_hmean(mcmc_mass, ["postrb", "betarb"])
mcmc_tdt_hmean = get_hmean(mcmc_tdt, ["postrb", "betarb"])
mcmc_pd_hmean = get_hmean(mcmc_pd, ["postrb", "betarb"])

In [0]:
hdf['mcmc_mass_hmean'] = mcmc_mass_hmean
hdf['mcmc_tdt_hmean'] = mcmc_tdt_hmean
hdf['mcmc_pd_hmean'] = mcmc_pd_hmean

In [0]:
mcmc_hmean = {'mass': mcmc_mass_hmean,
             'tdt': mcmc_tdt_hmean,
             'pd': mcmc_pd_hmean}

In [0]:
?sp.stats.distributions.norm

In [0]:
sm.qqplot.__module__

In [0]:
sm.qqplot(mcmc_mass_hmean.postrb_hmean, dist=sp.stats.distributions.norm, line="s")
plt.show()

In [0]:
def plot_dist(data, title):
    sns.set_context("talk")
    plt.hist(data, bins=100, alpha=0.2)
    dists = ['norm', 'rayleigh', 'maxwell', 'logistic','laplace', 'cauchy']
    for d in dists:
        dist = getattr(sp.stats, d)
        param = dist.fit(data)
        x = np.linspace(min(data), max(data), 10000)
        pdf_fitted = dist.pdf(x,loc=param[0],scale=param[1])
        plt.plot(x, pdf_fitted, label=d)
    plt.legend()
    plt.title(title)
    plt.show()
plot_dist(mcmc_mass_hmean.postrb_hmean, "mass")
plot_dist(mcmc_pd_hmean.postrb_hmean, "pd")
plot_dist(mcmc_tdt_hmean.postrb_hmean, "tdt")

In [0]:
def percent_difference(x, y):
    x = float(x)
    y = float(y)
    return (np.abs(x-y)/np.mean([x, y]))*100

def get_quantile_max(name, data, q):
    d = data.quantile(q)
    d.index = [str(x) for x in d.index]
    d['median_val'] = data.median()
    d['mean_val'] = data.mean()
    d['mad_norm'] = sm.robust.mad(data)
    d['mad_norm_cutoff'] = 3*d['mad_norm'] + d['median_val']
    d['mad_cauchy'] = sm.robust.mad(data, c=sp.stats.cauchy.ppf(0.75))
    d['mad_cauchy_cutoff'] = 3*d['mad_cauchy'] + d['median_val']
    d['cutoff'] = 0.01
    d["x99_cutoff"] = percent_difference(d['0.99'], d['cutoff'])
    d["x99_median"] =  percent_difference(d['0.99'], d['median_val'])
    d["x95_cutoff"] = percent_difference(d['0.95'], d['cutoff'])
    d["x95_median"] =  percent_difference(d['0.95'], d['median_val'])
    d['relaxed_cutoff'] = d['0.99']
    d['min'] = data.min()
    d['max'] = data.max()
    d.name = name
    return d

mass_quant = get_quantile_max("mass", mcmc_mass_hmean.postrb_hmean, [0.95,0.99])
pd_quant = get_quantile_max("pd", mcmc_pd_hmean.postrb_hmean, [0.95,0.99])
tdt_quant =get_quantile_max("tdt", mcmc_tdt_hmean.postrb_hmean, [0.95,0.99]) 

In [0]:
print("%s\n\n%s\n\n%s\n" % (mass_quant, pd_quant, tdt_quant))

In [0]:
hdf['mass_quant'] = mass_quant
hdf['pd_quant'] = pd_quant
hdf['tdt_quant'] = tdt_quant

In [0]:
sig_snps_mass = mcmc_mass_hmean[mcmc_mass_hmean.postrb_hmean > mass_quant.cutoff]
sig_snps_tdt = mcmc_tdt_hmean[mcmc_tdt_hmean.postrb_hmean > tdt_quant.cutoff]
sig_snps_pd = mcmc_pd_hmean[mcmc_pd_hmean.postrb_hmean > pd_quant.cutoff]

relaxed_sig_snps_mass = mcmc_mass_hmean[mcmc_mass_hmean.postrb_hmean > mass_quant.relaxed_cutoff]
relaxed_sig_snps_tdt = mcmc_tdt_hmean[mcmc_tdt_hmean.postrb_hmean > tdt_quant.relaxed_cutoff]
relaxed_sig_snps_pd = mcmc_pd_hmean[mcmc_pd_hmean.postrb_hmean > pd_quant.relaxed_cutoff]

In [0]:
sig_snps_mass.shape, sig_snps_tdt.shape, sig_snps_pd.shape

In [0]:
relaxed_sig_snps_mass.shape, relaxed_sig_snps_tdt.shape, relaxed_sig_snps_pd.shape

In [0]:
relaxed_sig_snps_mass.head()

In [0]:
relaxed_sig_snps_tdt.head()

In [0]:
relaxed_sig_snps_pd.head()

In [0]:
contig_pips = {}
def get_contig_pip(row, pheno):
    if not pheno in contig_pips:
        contig_pips[pheno] = {}
        
    d = row.rs.split("_")
    contig = "_".join(d[:-1])
    if not contig in contig_pips[pheno]:
        contig_pips[pheno][contig] = {'betarb':0,'postrb':0}
    contig_pips[pheno][contig]['postrb'] += row.postrb_hmean
    contig_pips[pheno][contig]['betarb'] += row.betarb_hmean

for pheno, df in list(mcmc_hmean.items()):
    print(pheno)
    df.apply(get_contig_pip, args=(pheno,), axis=1)


In [0]:
contig_pip_dfs = {}
for pheno, data in list(contig_pips.items()):
    contig_pip_dfs[pheno] = pd.DataFrame(data).T

In [0]:
from Bio import SeqIO
contig_lengths = {}
for rec in SeqIO.parse(assembly,"fasta"):
    contig_lengths[rec.name] = {"length":len(rec)}

In [0]:
contig_length_df = pd.DataFrame(contig_lengths).T

In [0]:
contig_length_df.head()

In [0]:
contig_pip_mass = contig_pip_dfs['mass'].join(contig_length_df)
contig_pip_tdt = contig_pip_dfs['tdt'].join(contig_length_df)
contig_pip_pd = contig_pip_dfs['pd'].join(contig_length_df)

In [0]:
hdf['contig_pip_mass'] = contig_pip_mass
hdf['contig_pip_tdt'] = contig_pip_tdt
hdf['contig_pip_pd'] = contig_pip_pd

In [0]:
def plot_contig_length_vs_pip(df, title):
    plt.scatter(df.length, df.postrb)
    plt.title(title)
    plt.xlabel("length of contig")
    plt.ylabel("postrb")
    plt.show()
for key, df in list({'mass':contig_pip_mass, 
                'tdt': contig_pip_tdt, 
                'pd': contig_pip_pd}.items()):
    plot_contig_length_vs_pip(df, key)

In [0]:
plt.plot(contig_pip_dfs['tdt'].postrb, label="PIP")
plt.title("tdt contigs")
plt.legend()
plt.show()

plt.plot(contig_pip_dfs['mass'].postrb, label="PIP")
plt.title("mass contigs")
plt.legend()
plt.show()

plt.plot(contig_pip_dfs['pd'].postrb, label="PIP")
plt.title("pd contigs")
plt.legend()
plt.show()



In [0]:
plt.xlim(0, len(mcmc_mass))
plt.plot(mcmc_mass_hmean.postrb_hmean, alpha=0.5, label="PIP (RB)")
plt.plot(mcmc_mass_hmean.betarb_hmean, alpha=0.5, label="Beta (RB)")
plt.title("Mass")
plt.xlabel("SNP")
plt.legend()
plt.show()

In [0]:
plt.xlim(0, len(mcmc_pd))
plt.plot(mcmc_pd_hmean.postrb_hmean, alpha=0.5, label="PIP (RB)")
plt.plot(mcmc_pd_hmean.betarb_hmean, alpha=0.5, label="Beta (RB)")
plt.title("PD")
plt.xlabel("SNP")
plt.legend()
plt.show()

In [0]:
plt.xlim(0, len(mcmc_tdt))
plt.plot(mcmc_tdt_hmean.postrb_hmean, alpha=0.5, label="PIP (RB)")
plt.plot(mcmc_tdt_hmean.betarb_hmean, alpha=0.5, label="Beta (RB)")
plt.title("TDT")
plt.xlabel("SNP")
plt.legend()
plt.show()

In [0]:
snps = {}
for pheno, files in list(snp_files.items()):
    if not pheno in snps:
        snps[pheno] = pd.DataFrame()
    for f in files:
        index = os.path.basename(f).split("_")[-1].split(".")[0]
        h = open(f)
        h.readline() ##skip header
        header = h.readline().strip().split()
        data = []
        for line in h:
            line = line.strip().split()
            data.append(line)
            
        testdf = pd.DataFrame(data, columns=header)
        testdf.columns = ["%s_%s" % (x.strip(), index) for x in testdf.columns]
        snps[pheno] = pd.concat([snps[pheno], testdf], axis=1)

In [0]:
snps_mass = snps['mass'][[x for x in snps['mass'] if '_1' in x]]

In [0]:
snps_mass.head()

In [0]:
def read_gamma(f):
    d = []
    h = open(f)
    header = h.readline().strip().split()
    for line in h:
        line = line.strip().split()
        d.append(line)
    df = pd.DataFrame(d, columns=header)
    return df.replace('NA', np.nan).astype(float)
gamma_mass = read_gamma(gamma_files['mass'][0])
gamma_pd = read_gamma(gamma_files['pd'][0])
gamma_tdt = read_gamma(gamma_files['tdt'][0])

In [0]:
hdf["sig_snps_mass"] = sig_snps_mass
hdf["sig_snps_tdt"] = sig_snps_tdt
hdf["sig_snps_pd"] = sig_snps_pd
hdf["relaxed_sig_snps_mass"] = relaxed_sig_snps_mass
hdf["relaxed_sig_snps_tdt"] = relaxed_sig_snps_tdt
hdf["relaxed_sig_snps_pd"] = relaxed_sig_snps_pd



In [0]:
plt.hist(np.abs(sig_snps_mass.betarb_hmean.values))
plt.text(0.007, 1.5, r"$n = %d$" % len(sig_snps_mass))
plt.title(r"Mass ($> %.2f$)" % mass_quant.cutoff)
plt.xlabel(r"$\beta$")
plt.show()
plt.hist(np.abs(relaxed_sig_snps_mass.betarb_hmean.values))
plt.text(0.007, 70, r"$n = %d$" % len(relaxed_sig_snps_mass))
plt.title(r"Mass 99th($> %.5f$)" % mass_quant.relaxed_cutoff)
plt.xlabel(r"$\beta$")
plt.show()

In [0]:
plt.hist(np.abs(sig_snps_tdt.betarb_hmean.values))
plt.text(0.010, 2.5, r"$n = %d$" % len(sig_snps_tdt))
plt.title(r"TDT ($> %.2f$)" % tdt_quant.cutoff)
plt.xlabel(r"$\beta$")
plt.show()
plt.hist(np.abs(relaxed_sig_snps_tdt.betarb_hmean.values))
plt.text(0.010, 100, r"$n = %d$" % len(relaxed_sig_snps_tdt))
plt.title(r"TDT 99th ($> %.5f$)" % tdt_quant.relaxed_cutoff)
plt.xlabel(r"$\beta$")
plt.show()

In [0]:
plt.hist(np.abs(sig_snps_pd.betarb_hmean.values))
plt.text(0.008, 6, r"$n = %d$" % len(sig_snps_pd))
plt.title(r"PD ($> %.2f$)" % pd_quant.cutoff)
plt.xlabel(r"$\beta$")
plt.show()
plt.hist(np.abs(relaxed_sig_snps_pd.betarb_hmean.values))
plt.text(0.008, 70, r"$n = %d$" % len(relaxed_sig_snps_pd))
plt.title(r"PD 99th ($> %.5f$)" % pd_quant.relaxed_cutoff)
plt.xlabel(r"$\beta$")
plt.show()