# Volcano plots for transcriptomics and proteomics

This notebook loads the aggregated mixed-effects results and generates volcano plots for each subpopulation combination and drug family.


In [1]:
# Imports
import os
import pandas as pd
import seaborn as sns

# Specific imports from ccf_medication modules
from ccf_medication.utils.loading import (
    load_aggregated_results,
)
from ccf_medication.plotting.volcano import (
    plot_volcano_plots_with_labels,
)

# Constants
from ccf_medication.constants.pathing import (
    RESULTS_DIR,
    AGG_REM_VS_ACT_TX_RESULTS_PATH,
    AGG_REM_VS_ACT_PX_RESULTS_PATH,
)
from ccf_medication.constants.thresholds import (
    ADJ_PVAL_THRESH,
    TX_FC_THRESH,
    ADJ_PVAL_THRESH,
    PX_FC_THRESH,
)

# Pretty Dataframes
sns.set(style="whitegrid")
pd.options.mode.chained_assignment = None


In [2]:
# Load aggregated results
print("Loading aggregated results...")

tx_rem_vs_act_results = load_aggregated_results(AGG_REM_VS_ACT_TX_RESULTS_PATH)
px_rem_vs_act_results = load_aggregated_results(AGG_REM_VS_ACT_PX_RESULTS_PATH)

print("Transcriptomics rows:", len(tx_rem_vs_act_results))
print("Proteomics rows:", len(px_rem_vs_act_results))


Loading aggregated results...
Transcriptomics rows: 357126
Proteomics rows: 29390


In [3]:
# Output directories
VOLCANO_DIR = os.path.join(RESULTS_DIR, "volcano_plots_rem_vs_act")
os.makedirs(VOLCANO_DIR, exist_ok=True)

VOLCANO_TX_DIR = os.path.join(VOLCANO_DIR, "transcriptomics")
VOLCANO_PX_DIR = os.path.join(VOLCANO_DIR, "proteomics")

for d in [VOLCANO_TX_DIR, VOLCANO_PX_DIR]:
    os.makedirs(d, exist_ok=True)

VOLCANO_DIR


'/home/timothy.hart/ccf-medication/data/results/volcano_plots_rem_vs_act'

In [4]:
# Volcano plots: Transcriptomics (static PNG)
print("Plotting Transcriptomics volcano plots...")

plot_volcano_plots_with_labels(
    results_df=tx_rem_vs_act_results,
    subpop_cols=["diagnosis", "simple_tissue"],
    drug_family_col="drug_family",
    feature_col="feature",
    p_value_col="adjusted_p_value",
    p_value_threshold=ADJ_PVAL_THRESH,
    fold_change_threshold=TX_FC_THRESH,
    output_path=VOLCANO_DIR,
    sub_dir_levels=["transcriptomics"],
    file_suffix="volcano",
    interactive=False,
)

print("Done: Transcriptomics")


Plotting Transcriptomics volcano plots...
Done: Transcriptomics


In [5]:
# Volcano plots: Proteomics (static PNG)
print("Plotting Proteomics volcano plots...")

plot_volcano_plots_with_labels(
    results_df=px_rem_vs_act_results,
    subpop_cols=["diagnosis"],
    drug_family_col="drug_family",
    feature_col="feature",
    p_value_col="adjusted_p_value",
    p_value_threshold=ADJ_PVAL_THRESH,
    fold_change_threshold=PX_FC_THRESH,
    output_path=VOLCANO_DIR,
    sub_dir_levels=["proteomics"],
    file_suffix="volcano",
    interactive=False,
)

print("Done: Proteomics")


Plotting Proteomics volcano plots...
Done: Proteomics
