# Analysis of Effects from Artilces of Posts

In [68]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [69]:
#set format to float with 4 decimals
pd.set_option('display.float_format', '{:.4f}'.format)

Load, specify and visualize data for analysis

In [70]:
#read data
test_stats = pd.read_csv("test_stats.csv")
desc_stats = pd.read_csv("descriptive_stats.csv")

#emotions of articles(_A) and posts(_P) included in the analysis 
emotions_a = ['Anger_A', 'Fear_A', 'Disgust_A', 'Joy_A', 'None_A']
emotions_p = ['Anger_P', 'Fear_P', 'Disgust_P', 'Joy_P', 'None_P']

Descriptive statistics per topic

In [71]:
etat = desc_stats[desc_stats['topic'] == 'Etat']
etat

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std
0,Anger_A,Etat,0.237,0.2312,0.2428,0.2895
9,Fear_A,Etat,0.4716,0.4628,0.4802,0.4262
18,Disgust_A,Etat,0.0068,0.0061,0.0076,0.0372
27,Joy_A,Etat,0.0165,0.0146,0.0186,0.0992
36,None_A,Etat,0.2576,0.2516,0.264,0.3094
45,Anger_P,Etat,0.5413,0.5326,0.55,0.4307
54,Fear_P,Etat,0.1533,0.1464,0.1602,0.3423
63,Disgust_P,Etat,0.027,0.0242,0.0299,0.1415
72,Joy_P,Etat,0.0635,0.059,0.0681,0.2243
81,None_P,Etat,0.1858,0.1794,0.1923,0.3163


In [72]:
inland = desc_stats[desc_stats['topic'] == 'Inland']
inland

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std
1,Anger_A,Inland,0.1499,0.1482,0.1517,0.1991
10,Fear_A,Inland,0.6322,0.6289,0.6354,0.3723
19,Disgust_A,Inland,0.0013,0.0012,0.0014,0.0073
28,Joy_A,Inland,0.0044,0.0042,0.0047,0.0281
37,None_A,Inland,0.206,0.2037,0.2084,0.2663
46,Anger_P,Inland,0.5574,0.5537,0.5612,0.428
55,Fear_P,Inland,0.1795,0.1763,0.1827,0.3646
64,Disgust_P,Inland,0.0156,0.0146,0.0165,0.1071
73,Joy_P,Inland,0.0407,0.0392,0.0422,0.1765
82,None_P,Inland,0.1836,0.1808,0.1863,0.3141


In [73]:
international = desc_stats[desc_stats['topic'] == 'International']
international

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std
2,Anger_A,International,0.0774,0.0758,0.079,0.1452
11,Fear_A,International,0.7572,0.7533,0.761,0.3509
20,Disgust_A,International,0.0068,0.0063,0.0073,0.0481
29,Joy_A,International,0.0034,0.0032,0.0037,0.0231
38,None_A,International,0.1451,0.1424,0.1478,0.246
47,Anger_P,International,0.53,0.5255,0.5346,0.4267
56,Fear_P,International,0.2008,0.1966,0.2049,0.3815
65,Disgust_P,International,0.0166,0.0155,0.0178,0.1064
74,Joy_P,International,0.0322,0.0306,0.0339,0.1569
83,None_P,International,0.1947,0.1911,0.1981,0.3193


In [74]:
kultur = desc_stats[desc_stats['topic'] == 'Kultur']
kultur

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std
3,Anger_A,Kultur,0.0669,0.0633,0.0705,0.1452
12,Fear_A,Kultur,0.7009,0.6911,0.7108,0.388
21,Disgust_A,Kultur,0.0031,0.0026,0.0035,0.0173
30,Joy_A,Kultur,0.0187,0.0166,0.021,0.088
39,None_A,Kultur,0.1384,0.1314,0.1454,0.2779
48,Anger_P,Kultur,0.353,0.3428,0.3633,0.4113
57,Fear_P,Kultur,0.1252,0.1172,0.1329,0.3156
66,Disgust_P,Kultur,0.024,0.0208,0.0272,0.127
75,Joy_P,Kultur,0.1568,0.1486,0.1654,0.3383
84,None_P,Kultur,0.245,0.236,0.2541,0.3598


In [75]:
panorama = desc_stats[desc_stats['topic'] == 'Panorama']
panorama

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std
4,Anger_A,Panorama,0.0963,0.095,0.0977,0.157
13,Fear_A,Panorama,0.7055,0.7025,0.7087,0.3569
22,Disgust_A,Panorama,0.0158,0.0151,0.0164,0.074
31,Joy_A,Panorama,0.0079,0.0074,0.0085,0.0632
40,None_A,Panorama,0.1551,0.1531,0.157,0.227
49,Anger_P,Panorama,0.5144,0.5106,0.518,0.4299
58,Fear_P,Panorama,0.2105,0.2071,0.2139,0.3891
67,Disgust_P,Panorama,0.0214,0.0204,0.0226,0.1234
76,Joy_P,Panorama,0.04,0.0385,0.0416,0.1765
85,None_P,Panorama,0.1845,0.1818,0.1872,0.3129


In [76]:
sport = desc_stats[desc_stats['topic'] == 'Sport']
sport

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std
5,Anger_A,Sport,0.1058,0.1041,0.1074,0.1438
14,Fear_A,Sport,0.3484,0.3442,0.3526,0.3702
23,Disgust_A,Sport,0.0013,0.0012,0.0014,0.0091
32,Joy_A,Sport,0.0453,0.0437,0.047,0.1424
41,None_A,Sport,0.4273,0.4236,0.431,0.3295
50,Anger_P,Sport,0.4349,0.4301,0.4397,0.4203
59,Fear_P,Sport,0.0979,0.0949,0.1011,0.2811
68,Disgust_P,Sport,0.012,0.011,0.0131,0.0899
77,Joy_P,Sport,0.1385,0.1349,0.1421,0.3154
86,None_P,Sport,0.2654,0.2614,0.2696,0.3567


In [77]:
web = desc_stats[desc_stats['topic'] == 'Web']
web

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std
6,Anger_A,Web,0.1288,0.1265,0.1312,0.2149
15,Fear_A,Web,0.6404,0.6361,0.6448,0.3976
24,Disgust_A,Web,0.0038,0.0035,0.0042,0.0315
33,Joy_A,Web,0.0084,0.0078,0.0089,0.05
42,None_A,Web,0.2138,0.2104,0.2171,0.3093
51,Anger_P,Web,0.4939,0.4893,0.4986,0.4241
60,Fear_P,Web,0.1663,0.1625,0.1702,0.353
69,Disgust_P,Web,0.0175,0.0162,0.0187,0.1135
78,Joy_P,Web,0.0645,0.0621,0.0669,0.2214
87,None_P,Web,0.235,0.2313,0.2388,0.3429


In [78]:
wirtschaft = desc_stats[desc_stats['topic'] == 'Wirtschaft']
wirtschaft

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std
7,Anger_A,Wirtschaft,0.1197,0.1176,0.1219,0.2002
16,Fear_A,Wirtschaft,0.7176,0.7136,0.7216,0.37
25,Disgust_A,Wirtschaft,0.0003,0.0003,0.0003,0.0008
34,Joy_A,Wirtschaft,0.0014,0.0013,0.0015,0.0092
43,None_A,Wirtschaft,0.1558,0.1532,0.1584,0.2428
52,Anger_P,Wirtschaft,0.5538,0.5491,0.5584,0.4273
61,Fear_P,Wirtschaft,0.2058,0.2016,0.2101,0.3839
70,Disgust_P,Wirtschaft,0.0088,0.008,0.0096,0.0786
79,Joy_P,Wirtschaft,0.0326,0.0309,0.0343,0.1571
88,None_P,Wirtschaft,0.1808,0.1775,0.1841,0.3095


In [79]:
wissenschaft = desc_stats[desc_stats['topic'] == 'Wissenschaft']
wissenschaft

Unnamed: 0,variable,topic,mean,mean_ci_lower,mean_ci_upper,std
8,Anger_A,Wissenschaft,0.0397,0.0374,0.0421,0.0948
17,Fear_A,Wissenschaft,0.8072,0.7993,0.8153,0.3251
26,Disgust_A,Wissenschaft,0.0016,0.0013,0.0019,0.0112
35,Joy_A,Wissenschaft,0.0135,0.0115,0.0156,0.0843
44,None_A,Wissenschaft,0.1292,0.1232,0.1351,0.2418
53,Anger_P,Wissenschaft,0.4308,0.4206,0.4408,0.4206
62,Fear_P,Wissenschaft,0.1692,0.1606,0.1781,0.359
71,Disgust_P,Wissenschaft,0.0272,0.0238,0.0308,0.1422
80,Joy_P,Wissenschaft,0.0749,0.0691,0.081,0.2409
89,None_P,Wissenschaft,0.259,0.2504,0.2677,0.3595


Test Results by emotions of posts

In [80]:
anger = test_stats[test_stats['criterion'] == 'Anger_P']
anger

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
0,Anger_P,Anger_A,emotion,yes,128.2791,0.0052,0.0011,0.0112,0.0,0.9999,3.8889
1,Anger_P,Fear_A,emotion,yes,73.2392,0.0032,0.0,0.0105,0.0336,0.8699,4.0
2,Anger_P,Disgust_A,emotion,no,3.8746,0.0001,-0.0002,0.0009,0.2958,0.2434,2.8543
3,Anger_P,Joy_A,emotion,no,12.8323,0.0007,-0.0001,0.0035,0.1896,0.4716,3.65
4,Anger_P,None_A,emotion,no,46.0567,0.0027,-0.0,0.0125,0.0335,0.8917,4.0
5,Anger_P,NewsroomTopic,NewsroomTopic,yes,2940.7465,0.0117,0.0109,0.0126,0.0,1.0,9.0


In [81]:
fear = test_stats[test_stats['criterion'] == 'Fear_P']
fear

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
6,Fear_P,Anger_A,emotion,yes,137.5445,0.0043,0.0001,0.0118,0.0189,0.933,3.8889
7,Fear_P,Fear_A,emotion,yes,333.5935,0.0128,0.0048,0.0314,0.0,1.0,4.0
8,Fear_P,Disgust_A,emotion,no,15.5099,0.0005,-0.0001,0.0017,0.1446,0.5912,2.8543
9,Fear_P,Joy_A,emotion,no,48.8325,0.0018,-0.0002,0.0064,0.0512,0.8567,3.65
10,Fear_P,None_A,emotion,no,238.3377,0.009,-0.0,0.0227,0.0287,0.9159,4.0
11,Fear_P,NewsroomTopic,NewsroomTopic,yes,7557.7237,0.0302,0.0288,0.0315,0.0,1.0,9.0


In [82]:
disgust = test_stats[test_stats['criterion'] == 'Disgust_P']
disgust

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
12,Disgust_P,Anger_A,emotion,yes,111.1554,0.0038,0.0001,0.0118,0.0194,0.9349,3.8889
13,Disgust_P,Fear_A,emotion,no,84.0943,0.0026,-0.0,0.0099,0.0414,0.847,4.0
14,Disgust_P,Disgust_A,emotion,no,64.0589,0.0017,-0.0002,0.006,0.0912,0.7574,2.8543
15,Disgust_P,Joy_A,emotion,no,16.3755,0.0005,-0.0002,0.0022,0.1668,0.5658,3.65
16,Disgust_P,None_A,emotion,yes,45.9388,0.0018,0.0001,0.0052,0.0179,0.9258,4.0
17,Disgust_P,NewsroomTopic,NewsroomTopic,yes,2039.8865,0.0081,0.0074,0.0088,0.0,1.0,9.0


In [83]:
joy = test_stats[test_stats['criterion'] == 'Joy_P']
joy

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
18,Joy_P,Anger_A,emotion,no,78.4694,0.0027,-0.0,0.0072,0.0381,0.8597,3.8889
19,Joy_P,Fear_A,emotion,yes,283.255,0.011,0.0023,0.03,0.0001,0.9999,4.0
20,Joy_P,Disgust_A,emotion,no,8.2472,0.0002,-0.0002,0.0012,0.267,0.3904,2.8543
21,Joy_P,Joy_A,emotion,no,59.131,0.0028,-0.0,0.0102,0.0289,0.9064,3.65
22,Joy_P,None_A,emotion,no,307.7823,0.0114,-0.0003,0.0277,0.054,0.8955,4.0
23,Joy_P,NewsroomTopic,NewsroomTopic,yes,10718.2934,0.0428,0.0412,0.0445,0.0,1.0,9.0


In [84]:
none = test_stats[test_stats['criterion'] == 'None_P']
none

Unnamed: 0,criterion,predictor,type,signif_label,H_mean,epsilon2_mean,epsilon2_ci_lower,epsilon2_ci_upper,mean_p_value,signif_prob,mean_k
24,None_P,Anger_A,emotion,no,30.1577,0.0011,-0.0001,0.0035,0.0812,0.7651,3.8889
25,None_P,Fear_A,emotion,yes,153.5839,0.0055,0.0004,0.0129,0.0116,0.9549,4.0
26,None_P,Disgust_A,emotion,no,7.4439,0.0003,-0.0002,0.0018,0.2848,0.3786,2.8543
27,None_P,Joy_A,emotion,no,16.5981,0.0008,-0.0001,0.0041,0.1529,0.5934,3.65
28,None_P,None_A,emotion,yes,191.9795,0.0071,0.002,0.016,0.0006,0.9973,4.0
29,None_P,NewsroomTopic,NewsroomTopic,yes,2645.915,0.0106,0.0097,0.0114,0.0,1.0,9.0


Visualize Bootstrap Distributions

In [85]:
def plot_test_stats(stat_type="epsilon", group_by="dependent"):

    if stat_type == "epsilon":
        mean_col = "epsilon2_mean"
        ci_lower_col = "epsilon2_ci_lower"
        ci_upper_col = "epsilon2_ci_upper"
        y_label = "Kruskal-Wallis EpsilonÂ²"
    elif stat_type == "H":
        mean_col = "H_mean"
        ci_lower_col = "H_ci_lower"
        ci_upper_col = "H_ci_upper"
        y_label = "Kruskal-Wallis H"
    else:
        raise ValueError("stat_type must be one of 'epsilon' or 'H'")

    palette = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF']
    
    if group_by == "dependent":
        predictor_order = test_stats.groupby('predictor')[mean_col].mean().sort_values(ascending=False).index
        criteria = test_stats.groupby('criterion')[mean_col].mean().sort_values(ascending=False).index
        n_pred = len(predictor_order)
        total_width = 0.8
        bar_width = total_width / n_pred
        fig = go.Figure()

        for i, pred in enumerate(predictor_order):
            color = palette[i % len(palette)]
            x_positions, y_values, error_y_lower, error_y_upper, hover_texts = [], [], [], [], []

            for j, crit in enumerate(criteria):
                row = test_stats[(test_stats['predictor'] == pred) & (test_stats['criterion'] == crit)]
                if row.empty:
                    y, lower, upper, p_val = 0, 0, 0, np.nan
                else:
                    y = row[mean_col].values[0]
                    p_val = row['mean_p_value'].values[0]
                    lower = y - row[ci_lower_col].values[0] if ci_lower_col else 0
                    upper = row[ci_upper_col].values[0] - y if ci_upper_col else 0

                x = j - total_width/2 + i*bar_width + bar_width/2
                x_positions.append(x)
                y_values.append(y)
                error_y_lower.append(lower)
                error_y_upper.append(upper)
                hover_texts.append(
                    f"<b>Predictor:</b> {pred}<br>"
                    f"<b>Criterion:</b> {crit}<br>"
                    f"<b>{stat_type}:</b> {y:.3f}<br>"
                    f"<b>CI:</b> [{row[ci_lower_col].values[0]:.3f}, {row[ci_upper_col].values[0]:.3f}]<br>"
                    f"<b>Mean p-value:</b> {p_val:.4f}" if not np.isnan(p_val) else ""
                )

            fig.add_trace(go.Bar(
                x=x_positions,
                y=y_values,
                name=pred,
                marker_color=color,
                width=bar_width * 0.95,
                error_y=dict(
                    type='data',
                    symmetric=False,
                    array=error_y_upper,
                    arrayminus=error_y_lower,
                    color='black',
                    thickness=1.5,
                    width=5
                ),
                hovertext=hover_texts,
                hoverinfo="text"
            ))

        tick_positions = np.arange(len(criteria))
        fig.update_layout(
            xaxis=dict(
                tickmode='array',
                tickvals=tick_positions,
                ticktext=criteria,
                title="Dependent Variable"
            ),
            yaxis=dict(title=y_label),
            barmode='group',
            title=f"Effects by Criterion with CI ({stat_type})",
            legend_title_text="Predictor",
            width=1000,
            height=600
        )

    elif group_by == "predictor":
        predictor_order = test_stats.groupby('predictor')[mean_col].mean().sort_values(ascending=False).index
        criteria = test_stats.groupby('criterion')[mean_col].mean().sort_values(ascending=False).index
        n_crit = len(criteria)
        total_width = 0.8
        bar_width = total_width / n_crit
        fig = go.Figure()

        for j, crit in enumerate(criteria):
            color = palette[j % len(palette)]
            x_positions, y_values, error_y_lower, error_y_upper, hover_texts = [], [], [], [], []

            for i, pred in enumerate(predictor_order):
                row = test_stats[(test_stats['predictor'] == pred) & (test_stats['criterion'] == crit)]
                if row.empty:
                    y, lower, upper, p_val = 0, 0, 0, np.nan
                else:
                    y = row[mean_col].values[0]
                    p_val = row['mean_p_value'].values[0]
                    lower = y - row[ci_lower_col].values[0] if ci_lower_col else 0
                    upper = row[ci_upper_col].values[0] - y if ci_upper_col else 0

                x = i - total_width/2 + j*bar_width + bar_width/2
                x_positions.append(x)
                y_values.append(y)
                error_y_lower.append(lower)
                error_y_upper.append(upper)
                hover_texts.append(
                    f"<b>Predictor:</b> {pred}<br>"
                    f"<b>Criterion:</b> {crit}<br>"
                    f"<b>{stat_type}:</b> {y:.3f}<br>"
                    f"<b>CI:</b> [{row[ci_lower_col].values[0]:.3f}, {row[ci_upper_col].values[0]:.3f}]<br>"
                    f"<b>Mean p-value:</b> {p_val:.4f}" if not np.isnan(p_val) else ""
                )

            fig.add_trace(go.Bar(
                x=x_positions,
                y=y_values,
                name=crit,
                marker_color=color,
                width=bar_width * 0.9,
                error_y=dict(
                    type='data',
                    symmetric=False,
                    array=error_y_upper,
                    arrayminus=error_y_lower,
                    color='black',
                    thickness=1.5,
                    width=5
                ),
                hovertext=hover_texts,
                hoverinfo='text'
            ))

        tick_positions = list(range(len(predictor_order)))
        tick_texts = list(predictor_order)
        fig.update_layout(
            xaxis=dict(
                tickmode='array',
                tickvals=tick_positions,
                ticktext=tick_texts,
                title="Predictor"
            ),
            yaxis=dict(title=y_label),
            barmode='group',
            title=f"Effects by Predictor with CI ({stat_type})",
            legend_title_text="Criterion",
            width=1000,
            height=600
        )

    else:
        raise ValueError("group_by must be 'dependent' or 'predictor'")

    fig.show()

In [86]:
plot_test_stats(stat_type="epsilon", group_by="dependent")

In [87]:
plot_test_stats(stat_type="epsilon", group_by="predictor")