In [17]:
import polars as pl
from scipy import stats
from scipy.stats import mannwhitneyu
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import matplotlib.dates as mdates

## Mann-Whitney U Test on Stress/Wellbeing Data

This notebook performs statistical analysis on stress/wellbeing measurements comparing treatment and control groups.

In [18]:
# Load the stress data
df = pl.read_csv("../data/processed/cleaned_stress_data.csv", try_parse_dates=True)
df

date,time,wellbeing_level,scale_1_5,datetime,emotion_score,experiment_group,measurement_order
date,time,str,i64,datetime[μs],i64,str,i64
2025-06-09,03:00:00,"""Neutral""",3,2025-06-08 18:00:00,4,"""control""",2
2025-06-08,23:59:00,"""Pleasant""",4,2025-06-08 14:59:00,6,"""control""",1
2025-06-08,05:50:00,"""Pleasant""",4,2025-06-07 20:50:00,6,"""control""",2
2025-06-07,23:44:00,"""Very Pleasant""",5,2025-06-07 14:44:00,7,"""control""",1
2025-06-07,03:03:00,"""Slightly Pleasant""",4,2025-06-06 18:03:00,5,"""control""",2
…,…,…,…,…,…,…,…
2025-05-14,14:01:00,"""Neutral""",3,2025-05-14 14:01:00,4,"""treatment""",1
2025-05-13,18:00:00,"""Slightly Pleasant""",4,2025-05-13 18:00:00,5,"""treatment""",2
2025-05-13,14:00:00,"""Unpleasant""",2,2025-05-13 14:00:00,2,"""treatment""",1
2025-05-12,18:01:00,"""Pleasant""",4,2025-05-12 18:01:00,6,"""treatment""",2


In [25]:
# Overview of the data
print("Data shape:", df.shape)
print("\nColumns:", df.columns)
print("\nExperiment groups:")
print(df.group_by("experiment_group").len())
print("\nWellbeing levels:")
print(df.group_by("wellbeing_level").len().sort("wellbeing_level"))
print("\nEmotion score range:")
print(df.group_by("emotion_score").len().sort("emotion_score"))

Data shape: (50, 8)

Columns: ['date', 'time', 'wellbeing_level', 'scale_1_5', 'datetime', 'emotion_score', 'experiment_group', 'measurement_order']

Experiment groups:
shape: (2, 2)
┌──────────────────┬─────┐
│ experiment_group ┆ len │
│ ---              ┆ --- │
│ str              ┆ u32 │
╞══════════════════╪═════╡
│ treatment        ┆ 25  │
│ control          ┆ 25  │
└──────────────────┴─────┘

Wellbeing levels:
shape: (6, 2)
┌─────────────────────┬─────┐
│ wellbeing_level     ┆ len │
│ ---                 ┆ --- │
│ str                 ┆ u32 │
╞═════════════════════╪═════╡
│ Neutral             ┆ 11  │
│ Pleasant            ┆ 12  │
│ Slightly Pleasant   ┆ 17  │
│ Slightly Unpleasant ┆ 5   │
│ Unpleasant          ┆ 3   │
│ Very Pleasant       ┆ 2   │
└─────────────────────┴─────┘

Emotion score range:
shape: (6, 2)
┌───────────────┬─────┐
│ emotion_score ┆ len │
│ ---           ┆ --- │
│ i64           ┆ u32 │
╞═══════════════╪═════╡
│ 2             ┆ 3   │
│ 3             ┆ 5   │
│ 4 

In [27]:
treatment_data = df.filter(pl.col("experiment_group") == "treatment")
control_data = df.filter(pl.col("experiment_group") == "control")

In [28]:
emotion_treatment = treatment_data["emotion_score"].to_numpy()
emotion_control = control_data["emotion_score"].to_numpy()

In [29]:
# Perform Mann-Whitney U tests
print("=== Mann-Whitney U Test Results ===\n")

# Emotion score test
emotion_stat, emotion_p = mannwhitneyu(emotion_treatment, emotion_control, alternative='two-sided')
print("Emotion Score:")
print(f"  Treatment median: {np.median(emotion_treatment):.2f}")
print(f"  Control median: {np.median(emotion_control):.2f}")
print(f"  U-statistic: {emotion_stat:.2f}")
print(f"  p-value: {emotion_p:.6f}")
print(f"  Significant (α=0.05): {'Yes' if emotion_p < 0.05 else 'No'}")

=== Mann-Whitney U Test Results ===

Emotion Score:
  Treatment median: 5.00
  Control median: 5.00
  U-statistic: 306.00
  p-value: 0.904186
  Significant (α=0.05): No


In [30]:
def create_stress_mannwhitney_typst_table(results_list):
    """Create a typst formatted table for Mann-Whitney U test results on stress data"""

    # Start the table with header
    typst_table = f"""#table(
  columns: (auto, auto, auto, auto, auto, auto, auto, auto),
  inset: 8pt,
  align: center + horizon,
  stroke: 0.5pt,
  table.header(
    [*Variable*], [*Treatment N*], [*Control N*], [*Treatment Median*], [*Control Median*], [*U-statistic*], [*P-value*], [*Result*]
  ),"""

    # Add data rows
    for result in results_list:
        typst_table += f"""\n  [{result['variable']}], [{result['treatment_n']}], [{result['control_n']}], [{result['treatment_median']}], [{result['control_median']}], [{result['u_statistic']}], [{result['p_value']}], [{result['result']}],"""

    # Close the table
    typst_table += "\n)"

    return typst_table

def stress_mannwhitney_test_result(treatment_data, control_data, variable_name):
    """Perform Mann-Whitney U test and format results for stress data typst table"""

    # Remove any NaN values
    treatment_clean = treatment_data[~np.isnan(treatment_data)]
    control_clean = control_data[~np.isnan(control_data)]

    if len(treatment_clean) > 0 and len(control_clean) > 0:
        # Perform Mann-Whitney U test
        u_stat, p_value = mannwhitneyu(treatment_clean, control_clean, alternative='two-sided')

        # Calculate medians
        treatment_median = np.median(treatment_clean)
        control_median = np.median(control_clean)

        # Determine significance
        result = 'Significant' if p_value < 0.05 else 'Not significant'

        return {
            'variable': variable_name.replace('_', ' ').title(),
            'treatment_n': len(treatment_clean),
            'control_n': len(control_clean),
            'treatment_median': f"{treatment_median:.2f}",
            'control_median': f"{control_median:.2f}",
            'u_statistic': f"{u_stat:.2f}",
            'p_value': f"{p_value:.6f}" if p_value >= 0.000001 else "< 0.000001",
            'result': result
        }
    else:
        return {
            'variable': variable_name.replace('_', ' ').title(),
            'treatment_n': len(treatment_clean),
            'control_n': len(control_clean),
            'treatment_median': 'N/A',
            'control_median': 'N/A',
            'u_statistic': 'N/A',
            'p_value': 'N/A',
            'result': 'Insufficient data'
        }

In [32]:
# Generate typst table for stress data analysis
stress_results = [
    stress_mannwhitney_test_result(emotion_treatment, emotion_control, "emotion_score")
]

stress_typst_table = create_stress_mannwhitney_typst_table(stress_results)
print(stress_typst_table)

#table(
  columns: (auto, auto, auto, auto, auto, auto, auto, auto),
  inset: 8pt,
  align: center + horizon,
  stroke: 0.5pt,
  table.header(
    [*Variable*], [*Treatment N*], [*Control N*], [*Treatment Median*], [*Control Median*], [*U-statistic*], [*P-value*], [*Result*]
  ),
  [Emotion Score], [25], [25], [5.00], [5.00], [306.00], [0.904186], [Not significant],
)
