# Analysis of Effects from Artilces of Posts

In [449]:
import numpy as np
import pandas as pd
from scipy.stats import kruskal
from joblib import Parallel, delayed
import plotly.graph_objects as go

Load and specify data for analysis

In [450]:
#set format to float with 4 decimals
pd.set_option('display.float_format', '{:.4f}'.format)

#read data
sentiments = pd.read_csv("sentiment_results.csv")

#smotions of articles(_A) and posts(_P) included in the analysis 
emotions_a = ['Anger_A', 'Fear_A', 'Disgust_A', 'Joy_A', 'None_A']
emotions_p = ['Anger_P', 'Fear_P', 'Disgust_P', 'Joy_P', 'None_P']

#filter relevant colums
sentiments = sentiments[emotions_a + ['NewsroomTopic'] + emotions_p]

Set Global Parameters

In [451]:
#set for optimal cpu utilisation
parallel_jobs = 10

#settings to account for testing errors
alpha = 0.05
bootstrap_samples = 100

#ranking parameters for emotions in articles
between_ranks = 3
rank_threshold = alpha

#set seed for reproducibility
seed = 666

Precompute Variables

In [452]:
#prepare topics
sentiments['NewsroomTopic'] = sentiments['NewsroomTopic'].astype('category')
topics = sentiments['NewsroomTopic'].to_numpy()
topic_levels = sentiments['NewsroomTopic'].cat.categories.to_numpy()


#convert DV arrays for faster access
dv_arrays = {dv: sentiments[dv].to_numpy() for dv in emotions_p}

#generate seed based bootstrap indices for tests
rng = np.random.default_rng(seed)
n = len(sentiments)

bootstrap_indices = []
for i in range(bootstrap_samples):
    idx = rng.choice(n, size=n, replace=True)
    bootstrap_indices.append(idx)

In [453]:
boot_pvalues_dict = {}
boot_h_dict = {}
boot_epsilon_dict = {}
boot_k_dict = {}
boot_desc_means_dict = {}
boot_desc_stds_dict = {}

Test Functions

In [454]:
def kruskal_test(groups):

    clean_groups = [g for g in groups if len(g) > 0]

    if len(clean_groups) <= 1:
        return np.nan, np.nan, np.nan, np.nan
    
    H, p = kruskal(*clean_groups)
    k = len(clean_groups)
    n_total = sum(len(g) for g in clean_groups)
    epsilon = (H - k + 1) / (n_total - k) if n_total > k else np.nan

    return H, p, epsilon, k

In [455]:
def bootstrap_stats(H_list, epsilon_list, p_list, k_list):
    
    H_arr = np.array(H_list)
    eps_arr = np.array(epsilon_list)
    p_arr = np.array(p_list)
    k_arr = np.array(k_list)
    
    return {
        "H_mean": np.nanmean(H_arr),
        "H_ci_lower": np.nanpercentile(H_arr, 100 * alpha / 2),
        "H_ci_upper": np.nanpercentile(H_arr, 100 * (1 - alpha / 2)),
        "epsilon2_mean": np.nanmean(eps_arr),
        "epsilon2_ci_lower": np.nanpercentile(eps_arr, 100 * alpha / 2),
        "epsilon2_ci_upper": np.nanpercentile(eps_arr, 100 * (1 - alpha / 2)),
        "mean_p_value": np.nanmean(p_arr),
        "signif_prob": np.mean(p_arr <= alpha) if len(p_arr) > 0 else np.nan,
        "mean_k": np.nanmean(k_arr)
    }

Analysis of Emotions

In [None]:
def emotion_bootstrap(idx_list, dv, iv_emo):

    H_vals, epsilon_vals, p_vals, k_vals = [], [], [], []
    for idx in idx_list:
        dv_data = dv_arrays[dv][idx]
        iv_data = sentiments[iv_emo].to_numpy()[idx]
        topic_data = topics[idx]
        num_ranks = between_ranks + 2 if rank_threshold > 0 else between_ranks
        edges = np.concatenate([[0], np.linspace(rank_threshold, 1 - rank_threshold, between_ranks + 1), [1]])
        rank_data = pd.cut(iv_data, bins=edges, labels=False, include_lowest=True) + 1

        for lvl in topic_levels:
            mask = topic_data == lvl
            groups = [dv_data[(mask) & (rank_data == r)] for r in range(1, num_ranks + 1)]
            H, p, epsilon, k = kruskal_test(groups)
            
            if not np.isnan(H):
                H_vals.append(H)
                epsilon_vals.append(epsilon)
                p_vals.append(p)
                k_vals.append(k)

    return {"H": H_vals, "epsilon": epsilon_vals, "p": p_vals, "k": k_vals}

In [457]:
def emotion_parallel(dv, iv_emo):
    
    split_indices = np.array_split(bootstrap_indices, parallel_jobs)
    results = Parallel(n_jobs=parallel_jobs)(
        delayed(emotion_bootstrap)(sub_idx, dv, iv_emo) for sub_idx in split_indices
    )

    H_all, eps_all, p_all, k_all = [], [], [], []
    for r in results:
        H_all.extend(r["H"])
        eps_all.extend(r["epsilon"])
        p_all.extend(r["p"])
        k_all.extend(r["k"])

    boot_h_dict[(dv, iv_emo)] = H_all
    boot_epsilon_dict[(dv, iv_emo)] = eps_all
    boot_pvalues_dict[(dv, iv_emo)] = p_all
    boot_k_dict[(dv, iv_emo)] = k_all

    return bootstrap_stats(H_all, eps_all, p_all, k_all)

Analysis of Topics

In [None]:
def topic_bootstrap(idx_list, dv):

    H_vals, epsilon_vals, p_vals, k_vals = [], [], [], []
    for idx in idx_list:
        dv_data = dv_arrays[dv][idx]
        topic_data = topics[idx]
        groups = [dv_data[topic_data == lvl] for lvl in topic_levels]
        H, p, epsilon, k = kruskal_test(groups)
        
        if not np.isnan(H):
            H_vals.append(H)
            epsilon_vals.append(epsilon)
            p_vals.append(p)
            k_vals.append(k)

    return {"H": H_vals, "epsilon": epsilon_vals, "p": p_vals, "k": k_vals}

In [459]:
def topic_parallel(dv):
    
    split_indices = np.array_split(bootstrap_indices, parallel_jobs)
    results = Parallel(n_jobs=parallel_jobs)(
        delayed(topic_bootstrap)(sub_idx, dv) for sub_idx in split_indices
    )

    H_all, eps_all, p_all, k_all = [], [], [], []
    for r in results:
        H_all.extend(r["H"])
        eps_all.extend(r["epsilon"])
        p_all.extend(r["p"])
        k_all.extend(r["k"])

    boot_h_dict[(dv, "NewsroomTopic")] = H_all
    boot_epsilon_dict[(dv, "NewsroomTopic")] = eps_all
    boot_pvalues_dict[(dv, "NewsroomTopic")] = p_all
    boot_k_dict[(dv, "NewsroomTopic")] = k_all

    return bootstrap_stats(H_all, eps_all, p_all, k_all)

Descriptive statistics

In [None]:
def descriptive_stats():

    numeric_vars = emotions_a + emotions_p

    rows = []
    for var in numeric_vars:

        for lvl in topic_levels:
            topic_mask = sentiments['NewsroomTopic'] == lvl
            topic_indices = np.where(topic_mask)[0]
            data = sentiments.loc[topic_mask, var].to_numpy()

            if len(data) == 0:
                continue

            mean = np.mean(data)
            std = np.std(data, ddof=1)

            boot_means = []
            boot_stds = []
            for b in range(bootstrap_samples):
                global_idx = bootstrap_indices[b]
                topic_sample_idx = np.intersect1d(global_idx, topic_indices)
                
                if len(topic_sample_idx) > 0:
                    sample_data = data[np.searchsorted(topic_indices, topic_sample_idx)]
                    boot_means.append(np.mean(sample_data))
                    boot_stds.append(np.std(sample_data, ddof=1))

            boot_desc_means_dict[(var, lvl)] = boot_means
            boot_desc_stds_dict[(var, lvl)] = boot_stds
            mean_ci_lower = np.percentile(boot_means, 100 * alpha / 2) if boot_means else np.nan
            mean_ci_upper = np.percentile(boot_means, 100 * (1 - alpha / 2)) if boot_means else np.nan
            std_ci_lower = np.percentile(boot_stds, 100 * alpha / 2) if boot_stds else np.nan
            std_ci_upper = np.percentile(boot_stds, 100 * (1 - alpha / 2)) if boot_stds else np.nan
            rows.append({
                "variable": var,
                "topic": lvl,
                "mean": mean,
                "mean_ci_lower": mean_ci_lower,
                "mean_ci_upper": mean_ci_upper,
                "std": std,
                "std_ci_lower": std_ci_lower,
                "std_ci_upper": std_ci_upper
            })

    return pd.DataFrame(rows)

Summarize Testings with Structured Output

In [None]:
def kruskal_wallis_bootstrap():

    test_rows = []
    for dv in emotions_p:
        
        for iv_emo in emotions_a:
            stats = emotion_parallel(dv, iv_emo)
            signif_label = "yes" if stats["epsilon2_ci_lower"] > 0 else "no"
            test_rows.append({
                "criterion": dv,
                "predictor": iv_emo,
                "type": "emotion",
                "signif_label": signif_label,
                **stats
            })

        stats = topic_parallel(dv)
        signif_label = "yes" if stats["epsilon2_ci_lower"] > 0 else "no"
        test_rows.append({
            "criterion": dv,
            "predictor": "NewsroomTopic",
            "type": "NewsroomTopic",
            "signif_label": signif_label,
            **stats
        })

    test_df = pd.DataFrame(test_rows)
    desc_df = descriptive_stats()
    return test_df, desc_df

# Run Analysis

In [462]:
test_stats, descriptive_stats = kruskal_wallis_bootstrap()

Descriptive statistics

In [463]:
descriptive_stats

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std,std_ci_lower,std_ci_upper
0,Anger_A,Etat,0.2370,0.2326,0.2409,0.2895,0.2871,0.2918
1,Anger_A,Inland,0.1499,0.1487,0.1514,0.1991,0.1978,0.2006
2,Anger_A,International,0.0774,0.0763,0.0789,0.1452,0.1430,0.1475
3,Anger_A,Kultur,0.0669,0.0642,0.0695,0.1452,0.1396,0.1498
4,Anger_A,Panorama,0.0963,0.0955,0.0975,0.1570,0.1557,0.1585
...,...,...,...,...,...,...,...,...
85,None_P,Panorama,0.1845,0.1825,0.1862,0.3129,0.3111,0.3144
86,None_P,Sport,0.2654,0.2623,0.2681,0.3567,0.3546,0.3584
87,None_P,Web,0.2350,0.2323,0.2374,0.3429,0.3407,0.3450
88,None_P,Wirtschaft,0.1808,0.1790,0.1830,0.3095,0.3073,0.3115


Results for anger in Posts

In [464]:
anger = test_stats[test_stats['criterion'] == 'Anger_P']
anger

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,H_ci_lower,H_ci_upper,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
0,Anger_P,Anger_A,emotion,yes,135.5436,4.5645,253.2019,0.0058,0.0002,0.017,0.015,0.9467,4.5211
1,Anger_P,Fear_A,emotion,yes,79.8447,10.6334,250.9737,0.0037,0.0002,0.0111,0.0056,0.98,5.0
2,Anger_P,Disgust_A,emotion,no,5.6018,0.0356,18.1461,0.0003,-0.0001,0.0024,0.2537,0.355,3.11
3,Anger_P,Joy_A,emotion,no,24.3504,1.0131,87.3823,0.0012,-0.0,0.004,0.1087,0.6711,4.2967
4,Anger_P,None_A,emotion,yes,59.7391,4.4897,125.65,0.0031,0.0,0.0117,0.0301,0.8922,5.0
5,Anger_P,NewsroomTopic,NewsroomTopic,yes,2941.5212,2672.5855,3152.9056,0.0117,0.0107,0.0126,0.0,1.0,9.0


Results for disgust in Posts

In [465]:
fear = test_stats[test_stats['criterion'] == 'Fear_P']
fear

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,H_ci_lower,H_ci_upper,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
6,Fear_P,Anger_A,emotion,no,155.3426,3.796,468.2764,0.0056,-0.0,0.0172,0.0327,0.8867,4.5211
7,Fear_P,Fear_A,emotion,yes,335.1855,34.8255,683.4259,0.0129,0.005,0.0294,0.0,1.0,5.0
8,Fear_P,Disgust_A,emotion,no,25.8203,0.0117,112.594,0.0007,-0.0002,0.0027,0.2274,0.49,3.11
9,Fear_P,Joy_A,emotion,yes,85.4871,6.4682,316.0602,0.003,0.0002,0.0068,0.0096,0.9489,4.2967
10,Fear_P,None_A,emotion,yes,271.4768,7.8737,586.7488,0.0106,0.0006,0.0313,0.008,0.9589,5.0
11,Fear_P,NewsroomTopic,NewsroomTopic,yes,7535.4858,7251.757,7788.2715,0.0301,0.029,0.0311,0.0,1.0,9.0


Results for disgust in Posts

In [466]:
disgust = test_stats[test_stats['criterion'] == 'Disgust_P']
disgust

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,H_ci_lower,H_ci_upper,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
12,Disgust_P,Anger_A,emotion,yes,112.8004,17.7559,367.6182,0.0042,0.001,0.0112,0.0002,0.9989,4.5211
13,Disgust_P,Fear_A,emotion,yes,91.6293,6.4676,320.8864,0.0031,0.0001,0.0096,0.0134,0.9456,5.0
14,Disgust_P,Disgust_A,emotion,no,118.4153,2.0021,589.2379,0.0031,-0.0,0.0116,0.0354,0.8712,3.11
15,Disgust_P,Joy_A,emotion,no,29.9583,0.7297,112.8975,0.0009,-0.0001,0.0027,0.1002,0.7033,4.2967
16,Disgust_P,None_A,emotion,yes,53.1055,4.0871,171.7865,0.0018,0.0,0.0052,0.0261,0.9067,5.0
17,Disgust_P,NewsroomTopic,NewsroomTopic,yes,2029.2472,1851.728,2213.9344,0.0081,0.0074,0.0088,0.0,1.0,9.0


Results for joy in Posts

In [467]:
joy = test_stats[test_stats['criterion'] == 'Joy_P']
joy

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,H_ci_lower,H_ci_upper,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
18,Joy_P,Anger_A,emotion,no,96.9984,2.4463,267.227,0.0039,-0.0002,0.0101,0.0616,0.8267,4.5211
19,Joy_P,Fear_A,emotion,yes,292.3567,15.5314,637.3325,0.0119,0.0019,0.0357,0.0013,0.9922,5.0
20,Joy_P,Disgust_A,emotion,no,13.8843,0.0352,72.1607,0.0004,-0.0002,0.0016,0.1602,0.59,3.11
21,Joy_P,Joy_A,emotion,yes,110.1556,5.4525,355.3223,0.0043,0.0001,0.0106,0.0121,0.9367,4.2967
22,Joy_P,None_A,emotion,yes,331.2057,12.0741,864.1064,0.0129,0.0013,0.0296,0.002,0.9878,5.0
23,Joy_P,NewsroomTopic,NewsroomTopic,yes,10691.627,10352.4535,10993.7896,0.0427,0.0414,0.0439,0.0,1.0,9.0


Results for none in Posts

In [468]:
none = test_stats[test_stats['criterion'] == 'None_P']
none

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,H_ci_lower,H_ci_upper,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
24,None_P,Anger_A,emotion,yes,45.5716,6.4508,152.3097,0.0018,0.0001,0.0048,0.0158,0.9278,4.5211
25,None_P,Fear_A,emotion,yes,162.8804,19.6175,376.0549,0.0064,0.0022,0.0152,0.0003,0.9989,5.0
26,None_P,Disgust_A,emotion,no,8.2316,0.0226,40.1996,0.0002,-0.0002,0.0015,0.2444,0.3962,3.11
27,None_P,Joy_A,emotion,no,34.4197,2.0284,104.9854,0.0014,-0.0,0.0042,0.0431,0.8289,4.2967
28,None_P,None_A,emotion,yes,203.3582,20.1773,522.5118,0.0078,0.0026,0.0169,0.0004,0.9978,5.0
29,None_P,NewsroomTopic,NewsroomTopic,yes,2644.3815,2473.8319,2884.0659,0.0105,0.0099,0.0115,0.0,1.0,9.0


Plots

In [469]:
def plot_bootstrap_distribution(stat_type, criterion=None, predictor=None, variable=None, topic=None):
    """
    Visualize bootstrap distributions from global dictionaries.
    
    Parameters
    ----------
    stat_type : str
        One of: "H", "epsilon", "p", "mean", "std"
    criterion : str
        Dependent variable (emotions_p) for KW stats
    predictor : str
        Predictor variable (emotions_a or "NewsroomTopic") for KW stats
    variable : str
        Numeric variable (emotions_a + emotions_p) for descriptives
    topic : str
        Topic name for descriptives
    """
    
    if stat_type in ["H", "epsilon", "p"]:
        if criterion is None or predictor is None:
            raise ValueError("criterion and predictor must be provided for Kruskal-Wallis stats")
        if stat_type == "H":
            data = boot_h_dict.get((criterion, predictor), [])
            title = f"H distribution for {criterion} ~ {predictor}"
        elif stat_type == "epsilon":
            data = boot_epsilon_dict.get((criterion, predictor), [])
            title = f"Epsilon² distribution for {criterion} ~ {predictor}"
        else:
            data = boot_pvalues_dict.get((criterion, predictor), [])
            title = f"p-value distribution for {criterion} ~ {predictor}"
    elif stat_type in ["mean", "std"]:
        if variable is None or topic is None:
            raise ValueError("variable and topic must be provided for descriptives")
        if stat_type == "mean":
            data = boot_desc_means_dict.get((variable, topic), [])
            title = f"Bootstrap means for {variable} in topic {topic}"
        else:
            data = boot_desc_stds_dict.get((variable, topic), [])
            title = f"Bootstrap stds for {variable} in topic {topic}"
    else:
        raise ValueError("stat_type must be one of H, epsilon, p, mean, std")
    
    if len(data) == 0:
        raise ValueError("No bootstrap data found for the given inputs.")
    
    fig = go.Figure()
    fig.add_trace(go.Histogram(
        x=data,
        nbinsx=50,
        histnorm='probability density',
        marker_color='lightblue',
        opacity=0.75
    ))

    fig.update_layout(
        title=title,
        xaxis_title=stat_type,
        yaxis_title="Density",
        template="plotly_white",
        bargap=0.2
    )
    fig.show()

In [470]:
plot_bootstrap_distribution(stat_type="H", criterion="Joy_P", predictor="Anger_A")

In [471]:
plot_bootstrap_distribution(stat_type="epsilon", criterion="Joy_P", predictor="Anger_A")

In [472]:
plot_bootstrap_distribution(stat_type="p", criterion="Joy_P", predictor="Anger_A")

In [473]:
plot_bootstrap_distribution(stat_type="mean", variable="Joy_P", topic="Etat")

In [474]:
plot_bootstrap_distribution(stat_type="std", variable="Joy_P", topic="Etat")

In [475]:
def plot_test_stats(stat_type="epsilon", group_by="dependent"):
    """
    Flexible Kruskal-Wallis bootstrap plot for any statistic.
    
    Parameters
    ----------
    stat_type : str
        "eta2", "epsilon", "H", or "p" (default "epsilon")
    group_by : str
        "dependent" = grouped by DV (x-axis = DVs, bars = predictors)
        "predictor" = grouped by predictor (x-axis = predictors, bars = DVs)
    """
    # Choose column names

    if stat_type == "epsilon":
        mean_col = "epsilon2_mean"
        ci_lower_col = "epsilon2_ci_lower"
        ci_upper_col = "epsilon2_ci_upper"
        y_label = "Kruskal-Wallis Epsilon²"
    elif stat_type == "H":
        mean_col = "H_mean"
        ci_lower_col = "H_ci_lower"
        ci_upper_col = "H_ci_upper"
        y_label = "Kruskal-Wallis H"
    else:
        raise ValueError("stat_type must be one of 'eta2', 'epsilon', 'H', 'p'")

    # Define grouping
    if group_by == "dependent":
        predictor_order = test_stats.groupby('predictor')[mean_col].mean().sort_values(ascending=False).index
        criteria = test_stats.groupby('criterion')[mean_col].mean().sort_values(ascending=False).index
        n_pred = len(predictor_order)
        n_crit = len(criteria)
        total_width = 0.8
        bar_width = total_width / n_pred
        fig = go.Figure()

        for i, pred in enumerate(predictor_order):
            x_positions, y_values, error_y_lower, error_y_upper, hover_texts = [], [], [], [], []

            for j, crit in enumerate(criteria):
                row = test_stats[(test_stats['predictor'] == pred) & (test_stats['criterion'] == crit)]
                if row.empty:
                    y = 0
                    lower, upper = 0, 0
                    p_val = np.nan
                else:
                    y = row[mean_col].values[0]
                    p_val = row['mean_p_value'].values[0]
                    if ci_lower_col and ci_upper_col:
                        lower = y - row[ci_lower_col].values[0]
                        upper = row[ci_upper_col].values[0] - y
                    else:
                        lower = upper = 0

                x = j - total_width/2 + i*bar_width + bar_width/2
                x_positions.append(x)
                y_values.append(y)
                error_y_lower.append(lower)
                error_y_upper.append(upper)
                hover_texts.append(
                    f"<b>Predictor:</b> {pred}<br>"
                    f"<b>Criterion:</b> {crit}<br>"
                    f"<b>{stat_type}:</b> {y:.3f}<br>"
                    f"<b>CI:</b> [{row[ci_lower_col].values[0]:.3f}, {row[ci_upper_col].values[0]:.3f}]<br>"
                    f"<b>Mean p-value:</b> {p_val:.4f}" if not np.isnan(p_val) else ""
                )

            fig.add_trace(go.Bar(
                x=x_positions,
                y=y_values,
                name=pred,
                width=bar_width * 0.95,
                error_y=dict(
                    type='data',
                    symmetric=False,
                    array=error_y_upper,
                    arrayminus=error_y_lower,
                    color='black',
                    thickness=1.5,
                    width=5
                ),
                hovertext=hover_texts,
                hoverinfo="text"
            ))

        tick_positions = np.arange(n_crit)
        fig.update_layout(
            xaxis=dict(
                tickmode='array',
                tickvals=tick_positions,
                ticktext=criteria,
                title="Dependent Variable"
            ),
            yaxis=dict(title=y_label),
            barmode='group',
            title=f"Effects by Criterion with CI ({stat_type})",
            legend_title_text="Predictor",
            width=1000,
            height=600
        )

    elif group_by == "predictor":
        predictor_order = test_stats.groupby('predictor')[mean_col].mean().sort_values(ascending=False).index
        criteria = test_stats.groupby('criterion')[mean_col].mean().sort_values(ascending=False).index
        n_pred = len(predictor_order)
        n_crit = len(criteria)
        colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692']
        criterion_colors = {crit: colors[i % len(colors)] for i, crit in enumerate(criteria)}
        total_width = 0.8
        bar_width = total_width / n_crit
        fig = go.Figure()

        for j, crit in enumerate(criteria):
            x_positions, y_values, error_y_lower, error_y_upper, hover_texts = [], [], [], [], []

            for i, pred in enumerate(predictor_order):
                row = test_stats[(test_stats['predictor'] == pred) & (test_stats['criterion'] == crit)]
                if row.empty:
                    y = 0
                    lower, upper = 0, 0
                    p_val = np.nan
                else:
                    y = row[mean_col].values[0]
                    p_val = row['mean_p_value'].values[0]
                    if ci_lower_col and ci_upper_col:
                        lower = y - row[ci_lower_col].values[0]
                        upper = row[ci_upper_col].values[0] - y
                    else:
                        lower = upper = 0

                x = i - total_width/2 + j*bar_width + bar_width/2
                x_positions.append(x)
                y_values.append(y)
                error_y_lower.append(lower)
                error_y_upper.append(upper)
                hover_texts.append(
                    f"<b>Predictor:</b> {pred}<br>"
                    f"<b>Criterion:</b> {crit}<br>"
                    f"<b>{stat_type}:</b> {y:.3f}<br>"
                    f"<b>CI:</b> [{row[ci_lower_col].values[0]:.3f}, {row[ci_upper_col].values[0]:.3f}]<br>"
                    f"<b>Mean p-value:</b> {p_val:.4f}" if not np.isnan(p_val) else ""
                )

            fig.add_trace(go.Bar(
                x=x_positions,
                y=y_values,
                name=crit,
                marker_color=criterion_colors[crit],
                width=bar_width * 0.9,
                error_y=dict(
                    type='data',
                    symmetric=False,
                    array=error_y_upper,
                    arrayminus=error_y_lower,
                    color='black',
                    thickness=1.5,
                    width=5
                ),
                hovertext=hover_texts,
                hoverinfo='text'
            ))

        tick_positions = list(range(n_pred))
        tick_texts = list(predictor_order)
        fig.update_layout(
            xaxis=dict(
                tickmode='array',
                tickvals=tick_positions,
                ticktext=tick_texts,
                title="Predictor"
            ),
            yaxis=dict(title=y_label),
            barmode='group',
            title=f"Effects by Predictor with CI ({stat_type})",
            legend_title_text="Criterion",
            width=1000,
            height=600
        )

    else:
        raise ValueError("group_by must be 'dependent' or 'predictor'")

    fig.show()

In [476]:
plot_test_stats(stat_type="epsilon", group_by="dependent")

In [477]:
plot_test_stats(stat_type="epsilon", group_by="predictor")


Save test_stats as CSV

In [None]:
#test_stats.to_csv('test_stats.csv', index=False)

In [None]:
#descriptive_stats.to_csv('descriptive_stats.csv', index=False)