In [None]:
class univar_stats:
    #include packages
    import pandas as pd
    import numpy as np
    import plotly.express as px
    import plotly.graph_objects as go
    from scipy.stats import ttest_ind, f_oneway, mannwhitneyu
    from statsmodels.stats.multitest import multipletests
    from itertools import combinations
    import warnings
    from typing import List, Optional, Dict, Any
    import plotly.io as pio
    #pio.templates.default = "plotly_white"
    import plotly.figure_factory as ff
    import plotly.express as px
    import plotly.graph_objects as go
    import statsmodels.api as sm

    

    def __init__(
        self, df, x_col, y_col,
        group_order=None, custom_colors=None,
        stats_options=None, p_value_threshold=0.05,
        annotate_style="value", y_offset_factor=0.35,
        show_non_significant=True, correct_p="bonferroni",
        title_=None, y_label=None, x_label=None,
        fig_height=800, fig_width=600,
        plot_type="box", show_axis_lines=True  # ← NEW PARAMETERS
    ):
        self.df = df
        self.x_col = x_col
        self.y_col = y_col
        self.group_order = group_order
        self.custom_colors = custom_colors
        self.stats_options = stats_options or ["t-test"]
        self.p_value_threshold = p_value_threshold
        self.annotate_style = annotate_style
        self.y_offset_factor = y_offset_factor
        self.show_non_significant = show_non_significant
        self.correct_p = correct_p
        self.title_ = title_ or y_col
        self.y_label = y_label or y_col
        self.x_label = x_label or x_col
        self.fig_height = fig_height
        self.fig_width = fig_width
        self.plot_type = plot_type
        self.show_axis_lines = show_axis_lines


    def plot(self):
        #import packages
        import pandas as pd
        import numpy as np
        import plotly.express as px
        import plotly.graph_objects as go
        from scipy.stats import ttest_ind, f_oneway, mannwhitneyu
        from statsmodels.stats.multitest import multipletests
        from itertools import combinations
        from typing import List, Optional, Dict, Any
        import plotly.io as pio
        #pio.templates.default = "plotly_white"
        import statsmodels.api as sm
        import warnings
        warnings.filterwarnings("ignore")

        df = self.df
        if df.empty:
            raise ValueError("The DataFrame is empty.")

        grouped = df.groupby(self.x_col)[self.y_col]
        group_order = self.group_order or list(grouped.groups.keys())
        comparisons = list(combinations(group_order, 2))

        y_range = df[self.y_col].max() - df[self.y_col].min()
        y_offset = self.y_offset_factor * y_range
        max_y = df[self.y_col].max()

        p_values, effect_sizes, annotations, lines = [], [], [], []

        if "anova" in self.stats_options and len(group_order) > 2:
            f_stat, anova_p = f_oneway(*(grouped.get_group(g).values for g in group_order))
            p_values = [anova_p] * len(comparisons)
        else:
            for g1, g2 in comparisons:
                group1 = grouped.get_group(g1).values
                group2 = grouped.get_group(g2).values

                if "t-test" in self.stats_options:
                    _, p_val = ttest_ind(group1, group2)
                elif "nonparametric" in self.stats_options:
                    _, p_val = mannwhitneyu(group1, group2, alternative="two-sided")
                else:
                    raise ValueError("Invalid stats_options.")

                p_values.append(p_val)

                if "effect-size" in self.stats_options:
                    effect_sizes.append(compute_effsize(group1, group2, eftype="cohen"))

        if self.correct_p and "anova" not in self.stats_options:
            _, corrected, _, _ = multipletests(p_values, method=self.correct_p)
            p_values = corrected

        # Plot selection
        if self.plot_type == "box":
            fig = px.box(
                df, x=self.x_col, y=self.y_col, color=self.x_col,
                points="all", category_orders={self.x_col: group_order},
                color_discrete_map=self.custom_colors
            )
        elif self.plot_type == "violin":
            fig = px.violin(
                df, x=self.x_col, y=self.y_col, color=self.x_col,
                box=True, points="all", category_orders={self.x_col: group_order},
                color_discrete_map=self.custom_colors
            )
        else:
            raise ValueError("Invalid plot_type. Use 'box' or 'violin'.")

        # Annotations
        for i, ((g1, g2), p_val) in enumerate(zip(comparisons, p_values)):
            if not self.show_non_significant and p_val > self.p_value_threshold:
                continue

            y_pos = max_y + 0.15 + (i + 1) * y_offset

            if self.annotate_style == "value":
                p_text = f"p={p_val:.4f}" if p_val >= 0.0001 else "p<0.0001"
            elif self.annotate_style == "symbol":
                if p_val < 0.001:
                    p_text = "***"
                elif p_val < 0.01:
                    p_text = "**"
                elif p_val < 0.05:
                    p_text = "*"
                else:
                    p_text = "ns"
            else:
                raise ValueError("Invalid annotate_style.")

            if "effect-size" in self.stats_options and "anova" not in self.stats_options:
                p_text += f", d={effect_sizes[i]:.2f}"

            annotations.append(dict(
                x=(group_order.index(g1) + group_order.index(g2)) / 2,
                y=y_pos + y_offset * 0.75,
                text=p_text,
                showarrow=False,
                xref="x", yref="y",
                font=dict(size=12),
            ))

            lines.append(go.Scatter(
                x=[g1, g1, g2, g2],
                y=[y_pos, y_pos + y_offset * 0.5, y_pos + y_offset * 0.5, y_pos],
                mode="lines",
                line=dict(color="black", width=1),
                hoverinfo="skip"
            ))

        for line in lines:
            fig.add_trace(line)

        # Axis styling
        axis_line_config = dict(
            showline=self.show_axis_lines,
            linewidth=2,
            linecolor="black"
        )

        fig.update_layout(
            annotations=annotations,
            title=dict(text=f"<b>{self.title_}</b>", x=0.5, y=0.95, xanchor='center', yanchor='top'),
            yaxis_title=self.y_label,
            xaxis_title=self.x_label,
            legend_title=self.x_col,
            width=self.fig_width,
            height=self.fig_height,
            showlegend=False,
            yaxis=dict(tickformat=".2e", **axis_line_config),
            xaxis=axis_line_config,
            paper_bgcolor='rgba(0,0,0,0)',
            plot_bgcolor='rgba(0,0,0,0)',
        )

        return fig

In [3]:
import pandas as pd
import numpy as np

# Seed for reproducibility
np.random.seed(42)

# Simulate example data
data = {
    "treatment": ["A"] * 20 + ["B"] * 20 + ["C"] * 20,
    "score": np.concatenate([
        np.random.normal(loc=5.0, scale=0.5, size=20),  # Group A
        np.random.normal(loc=5.5, scale=0.8, size=20),  # Group B
        np.random.normal(loc=6.0, scale=0.5, size=20)   # Group C
    ])
}

df = pd.DataFrame(data)

In [None]:
plotter = univar_stats(
    df,
    x_col="treatment",
    y_col="score",
    stats_options=["t-test", "effect-size"],
    correct_p="bonferroni",
    annotate_style="symbol",
    custom_colors={"A": "orange", "B": "blue", "C": "green"},
    title_="Treatment Effect on Score",
    y_label="Score",
    x_label="Treatment",
    fig_height=600,
    fig_width=800,
    show_non_significant=False,
    y_offset_factor=0.1,
    group_order=["B", "A", "C"]
    # group_order can be omitted if the order in the DataFrame is acceptable
    # p_value_threshold can be omitted if the default is acceptable
    # stats_options can be omitted if the default is acceptable
    # custom_colors can be omitted if the default is acceptable
)

fig = plotter.box_plot()
fig.show()

In [14]:
plotter = univar_stats(
    df,
    x_col="treatment",
    y_col="score",
    stats_options=["t-test"],
    correct_p="bonferroni",
    plot_type="box",            # <-- NEW!
    show_axis_lines=True,        # <-- NEW!
    annotate_style="symbol",
    custom_colors={"A": "red", "B": "blue", "C": "green"},
    y_offset_factor=0.35,
    title_="Treatment Effect on Score",
    y_label="Score",
    x_label="Treatment",
    fig_height=600,
    fig_width=800,
    group_order=["B", "A", "C"],  # <-- NEW!
)

fig = plotter.plot()
fig.show()