In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from sklearn.metrics import auc

%matplotlib inline


In [None]:
pr_df = pd.DataFrame()
for pr in snakemake.input:
    tmp_df = pd.read_csv(pr, sep='\t')
    tmp_df['filename'] = pr
    pr_df = pr_df.append(tmp_df)
pr_df['Sample_Name'] = pr_df['filename'].apply(lambda x: os.path.basename(x).split('.')[0])
pr_df.head()

In [None]:
samples = pr_df.Sample_Name.unique()
fig, axes = plt.subplots(figsize=(6, 6 * len(samples)),
                         nrows=len(samples))
if len(samples) == 1:
    axes = [axes]
pal = sns.color_palette('Set2', len(samples))
for smp, ax in zip(samples, axes):
    tmp_df = pr_df[pr_df.Sample_Name == smp]
    ax.plot(tmp_df.Recall, tmp_df.Precision, color=pal.pop(0))
    ax.set_xlabel('Recall')
    ax.set_ylabel('Precision (1-FDR)')
    ax.set_title('Precision-Recall Plot: {}'.format(smp))
    ax.set_ylim((0, 1.05))
    lbl = auc(tmp_df.Recall, tmp_df.Precision)
    ax.text(0.0, 0.85, "AUC = {:.3f}".format(lbl))
plt.savefig("results/plots/bagel_pr_curve.pdf")

In [None]:
ceg_genes = set()
with open(snakemake.params['ess'], 'rt') as fh:
    for line in fh:
        line = line.strip()
        if line == '':
            continue
        ceg_genes.add(line.split()[0])
neg_genes = set()
with open(snakemake.params['neg'], 'rt') as fh:
    for line in fh:
        line = line.strip()
        if line == '':
            continue
        neg_genes.add(line.split()[0])
pr_df['Essential'] = pr_df.Gene.apply(
    lambda x: 'Essential' if x in ceg_genes 
    else 'Negative' if x in neg_genes else 'Other')

In [None]:
fig, axes = plt.subplots(figsize=(6, 6 * len(samples)),
                         nrows=len(samples))
if len(samples) == 1:
    axes = [axes]
for smp, ax in zip(samples, axes):
    tmp_df = pr_df[pr_df.Sample_Name == smp]
    sns.histplot(ax=ax,
                 data=tmp_df,
                 bins=100,
                 binrange=(-100, 100),
                 x='BF',
                 hue='Essential',
                 kde=True,
                 hue_order=['Essential', 'Negative'])
    ax.set_title(smp)
    ax.set_xlim((-100, 100))
plt.tight_layout()
plt.savefig("results/plots/bagel_ess_neg_hist.pdf")

In [None]:
fig, axes = plt.subplots(figsize=(6, 6 * len(samples)),
                         nrows=len(samples),
                         sharex=True)
if len(samples) == 1:
    axes = [axes]
for smp, ax in zip(samples, axes):
    tmp_df = pr_df[pr_df.Sample_Name == smp]
    sns.kdeplot(ax=ax,
                data=tmp_df,
                x='BF',
                hue='Essential',
                fill=True,
                hue_order=['Essential', 'Negative'])
    ax.set_title(smp)
plt.tight_layout()
plt.savefig("results/plots/bagel_ess_neg_dist.pdf")

In [None]:
g = sns.FacetGrid(pr_df, row="Sample_Name", height=5, aspect=2,
                  hue="Sample_Name", row_order=samples,
                  hue_order=samples,
                  palette=sns.color_palette("Set2"),
                  sharey=True)
g = g.map(plt.hist, "BF", bins=50, range=(-100, 100),)
for ax in g.axes.flatten():
    ax.tick_params(labelbottom=True, labelleft=True)
    ax.set_ylabel("No. Genes")
g.fig.tight_layout()
g.fig.tight_layout()
plt.savefig("results/plots/bagel_dist.pdf")