In [1]:
import pandas as pd
from typing import Union, List, Optional
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt


def add_fdr_column(
    df: pd.DataFrame,
    p_col: str = "p_value",
    out_col: str = "p_value_adj",
    method: str = "fdr_bh",
    alpha: float = 0.05,
    inplace: bool = False,
) -> pd.DataFrame:
    """
    Add an FDR-adjusted p-value column to a DataFrame.

    Parameters
    ----------
    df : pd.DataFrame
        Input DataFrame containing raw p-values.
    p_col : str, default "p_value"
        Name of the column with raw p-values.
    out_col : str, default "p_value_adj"
        Name of the column to store adjusted p-values.
    method : str, default "fdr_bh"
        Correction method accepted by statsmodels.stats.multitest.multipletests
        (e.g., "fdr_bh", "fdr_by", "bonferroni", "holm").
    alpha : float, default 0.05
        Significance level used by multipletests when computing reject decisions
        (returned but not used further here).
    inplace : bool, default False
        If True, modify the original DataFrame; otherwise operate on a copy.

    Returns
    -------
    pd.DataFrame
        DataFrame with a new column containing FDR-adjusted p-values.
    """
    work_df: Union[pd.DataFrame, pd.core.generic.NDFrame]
    work_df = df if inplace else df.copy()

    # multipletests returns: reject, pvals_corrected, _, _
    _, qvals, _, _ = multipletests(
        work_df[p_col].astype(float).to_numpy(), alpha=alpha, method=method
    )
    work_df[out_col] = qvals
    return work_df

def plot_ratio_bars(
    df: pd.DataFrame,
    rows: List[int],
    ax: Optional[plt.Axes] = None,
    bar_kwargs: Optional[dict] = None,
    err_kwargs: Optional[dict] = None,
    label_rotation: int = 45,
    save_pdf: Optional[str] = None  # NEW: optionally save figure to this path
) -> plt.Axes:
    """
    Plot 'ratio_real' with error bars ('ratio_std') for the specified rows.

    Parameters
    ----------
    df : pd.DataFrame
        Must contain 'ratio_real', 'ratio_std', 'cell_type1', 'cell_type2'.
    rows : list[int]
        Row indices to include in the plot.
    ax : matplotlib.axes.Axes, optional
        Axis to plot on. If None, a new figure/axis is created.
    bar_kwargs : dict, optional
        Extra kwargs forwarded to `ax.bar`.
    err_kwargs : dict, optional
        Keyword arguments forwarded to `ax.bar` via `error_kw`.
    label_rotation : int, default 45
        Rotation angle for x-axis tick labels.
    save_pdf : str or None, default None
        File path to save the figure as PDF. If None, figure is not saved.

    Returns
    -------
    matplotlib.axes.Axes
        Axis containing the bar plot.
    """
    if bar_kwargs is None:
        bar_kwargs = {}
    if err_kwargs is None:
        err_kwargs = dict(ecolor="black", capsize=3, lw=1)

    if ax is None:
        fig, ax = plt.subplots(figsize=(1.2 * len(rows) + 2, 6))
    else:
        fig = ax.figure

    sel = df.loc[rows]
    heights = sel["ratio_real"].to_numpy()
    errors = sel["ratio_std"].to_numpy()
    labels = (sel["cell_type1"] + "-" + sel["cell_type2"]).to_list()

    x = range(len(rows))
    ax.bar(
        x,
        heights,
        yerr=errors,
        error_kw=err_kwargs,
        **bar_kwargs,
    )
    ax.set_xticks(list(x))
    ax.set_xticklabels(labels, rotation=label_rotation, ha="right")
    ax.set_ylabel("ratio_real")
    ax.set_xlabel("CellType1-CellType2")
    ax.set_title("Ratio Real with Standard Deviation")
    ax.axhline(1, color="red", linestyle="--", linewidth=1)  # horizontal reference line
    ax.margins(y=0.1)

    if save_pdf is not None:                                  
        fig.savefig(save_pdf, format="pdf", bbox_inches="tight")

    return ax

In [2]:
CB = pd.read_csv('Cerebellum_proximity_scores.csv',index_col=0)
HP = pd.read_csv('Hippocampus_proximity_scores.csv',index_col=0)

FileNotFoundError: [Errno 2] No such file or directory: 'Cerebellum_proximity_scores.csv'

In [None]:
add_fdr_column(CB, inplace=True, method="fdr_by")
add_fdr_column(HP, inplace=True, method="fdr_by")

In [None]:
plot_ratio_bars(
    CB[((CB.cell_type1 == 'Microglia NN') | (CB.cell_type2 == 'Microglia NN'))],
    [1392,71, 20, 27, 75, 82],
    save_pdf='figures/CB_proximity_plot.pdf'
)

In [None]:
plot_ratio_bars(
    HP[((HP.cell_type1 == 'Microglia NN') | (HP.cell_type2 == 'Microglia NN'))],
    [1456,1541, 1444, 1662, 1483],
    save_pdf='figures/HP_proximity_plot.pdf'
)