In [None]:
import re
from pathlib import Path

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from bioinf_common.plotting import corrplot

In [None]:
sns.set_context('talk')

# Parameters

In [None]:
dea_fname = snakemake.input.dea_fname

out_dir = Path(snakemake.output.out_dir)

# Load data

In [None]:
df = pd.read_csv(dea_fname)
df.set_index('node', inplace=True)

In [None]:
df['pvalue_trans'] = -np.log10(df['pvalue'])

In [None]:
df.head()

# Volcano Plot

In [None]:
def get_pathway_type(n):
    match = re.match(r'[a-zA-Z]+\d+([a-zA-Z]+)_[a-zA-Z]+', n)
    return match.group(1) if match is not None else n.split('_')[0]

index = pd.MultiIndex.from_tuples([(get_pathway_type(n), n) for n in df.index], names=['pathway_type', 'node'])
df.set_index(index, inplace=True)
df.head()

In [None]:
for pathway, group in df.groupby(level=[0]):
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x='log2FoldChange', y='pvalue_trans', data=group)

    plt.axhline(-np.log10(.05), color='red', ls='dashed')
    plt.axvline(1, color='red', ls='dashed')
    plt.axvline(-1, color='red', ls='dashed')

    xmax = abs(np.asarray(plt.gca().get_xlim())).max()
    plt.xlim(-xmax, xmax)
    
    plt.title(f'Pathway type: {pathway}')

    plt.tight_layout()
    plt.savefig(out_dir / f'volcano_plot_{pathway}.pdf')