In [51]:

import pandas as pd
import os
from datetime import datetime
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind, levene
import seaborn as sns
import scipy.stats as stats
from collections import Counter
import statsmodels.formula.api as smf
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.formula.api import ols
# from sklearn.utils import resample
import numpy as np

### Two Way Anova

In [52]:
output_dir = "Analysis/ANOVA"

def save(fn, anova):
    anova_out = anova.reset_index().rename(
        columns={"index": "Effect"}
    )

    descriptives_path = os.path.join(
        output_dir, f"{fn}_.csv"
    )
    anova_out.to_csv(descriptives_path, index=False)


In [53]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols


def run_two_way_anova(
    data_path: str,
    valid_experiments: list[str],
    dv: str,
    iv_style: str,
    iv_experiment: str,
    save_fn,
    save_name: str,
):
    # Load data
    df = pd.read_csv(data_path)

    # Filter experiments
    df = df[df[iv_experiment].isin(valid_experiments)].copy()

    # Check missing values
    relevant_columns = [dv, iv_style, iv_experiment]
    missing_summary = df[relevant_columns].isnull().sum()
    print("\nMissing values:")
    print(missing_summary)

    # Group means
    grouped_means = (
        df
        .groupby([iv_style, iv_experiment])[dv]
        .mean()
    )

    # Group counts
    group_counts = (
        df
        .groupby([iv_style, iv_experiment])
        .size()
    )

    print("\nGroup means:")
    print(grouped_means)

    print("\nGroup sizes:")
    print(group_counts)

    # Sanity check
    if grouped_means.empty or group_counts.empty:
        print("\nWARNING: No valid groups found.")
        print(f"Unique values in {iv_style}:", df[iv_style].unique())
        print(f"Unique values in {iv_experiment}:", df[iv_experiment].unique())
        raise ValueError("No valid groups for ANOVA.")

    # Two-way ANOVA with interaction
    formula = (
        f"{dv} ~ C({iv_style}) "
        f"+ C({iv_experiment}) "
        f"+ C({iv_style}):C({iv_experiment})"
    )

    model = ols(formula, data=df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)

    # Save results
    save_fn(save_name, anova_table)

    print("\nTwo-Way ANOVA Results:")
    print(anova_table)

    return anova_table


In [54]:

input_path = "Merging/Results"
experiment_path1 = os.path.join(input_path, "master_with_dv_means_recoded.csv")


df = pd.read_csv(experiment_path1)



df['Style_Glam0_Hippie1'] = pd.to_numeric(df['Style_Glam0_Hippie1'], errors='coerce')
df['DonationMoney'] = pd.to_numeric(df['DonationMoney'], errors='coerce')

df_filtered = df.dropna(subset=['DonationMoney']) 

#  Überprüfen, ob relevante Spalten fehlende Werte enthalten
relevant_columns = ['DonationMoney', 'Style_Glam0_Hippie1', 'Store']
missing_summary = df_filtered[relevant_columns].isnull().sum()
print(f"Fehlende Werte pro Spalte:\n{missing_summary}")

#  Gruppierte Mittelwerte und Gruppengrößen prüfen
grouped_means = df_filtered.groupby(['Style_Glam0_Hippie1', 'Store'])['DonationMoney'].mean()
group_counts = df_filtered.groupby(['Style_Glam0_Hippie1', 'Store']).size()
print("\nMittelwerte der Gruppen:")
print(grouped_means)
print("\nAnzahl der Beobachtungen pro Gruppe:")
print(group_counts)

#  Überprüfen auf leere Gruppen
if grouped_means.empty or group_counts.empty:
    print("\nWARNUNG: Keine gültigen Gruppen vorhanden. Überprüfen Sie die Filterbedingungen und Daten.")
    print("Einzigartige Werte in 'Style_Glam0_Hippie1':", df_filtered['Style_Glam0_Hippie1'].unique())
    print("Einzigartige Werte in 'Store':", df_filtered['Store'].unique())
    raise ValueError("Keine gültigen Gruppen für ANOVA gefunden.")

# Two-Way ANOVA mit Total_price als Zielvariable
model = ols('DonationMoney ~ C(Style_Glam0_Hippie1) + C(Store) + C(Style_Glam0_Hippie1):C(Store)', data=df_filtered).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

print("\nTwo-Way ANOVA Results:")
print(anova_table)


Fehlende Werte pro Spalte:
DonationMoney          0
Style_Glam0_Hippie1    0
Store                  0
dtype: int64

Mittelwerte der Gruppen:
Style_Glam0_Hippie1  Store   
0                    BeachBar    56.333333
                     Mall        43.572308
1                    BeachBar    37.325301
                     Mall        27.080229
Name: DonationMoney, dtype: float64

Anzahl der Beobachtungen pro Gruppe:
Style_Glam0_Hippie1  Store   
0                    BeachBar     75
                     Mall        325
1                    BeachBar     83
                     Mall        349
dtype: int64

Two-Way ANOVA Results:
                                       sum_sq     df          F        PR(>F)
C(Style_Glam0_Hippie1)           5.980486e+04    1.0  34.865646  5.152357e-09
C(Store)                         1.675922e+04    1.0   9.770458  1.835271e-03
C(Style_Glam0_Hippie1):C(Store)  2.020836e+02    1.0   0.117813  7.315061e-01
Residual                         1.420264e+06  828.0    

In [55]:

input_path = "Merging/Results"
experiment_path1 = os.path.join(input_path, "master_with_dv_means_recoded.csv")


df = pd.read_csv(experiment_path1)
df1 = pd.read_csv(experiment_path1)

df['Style_Hippie0_Glam1'] = pd.to_numeric(df['Style_Hippie0_Glam1'], errors='coerce')
df['DonationTime'] = pd.to_numeric(df['DonationTime'], errors='coerce')

df_filtered = df.dropna(subset=['DonationTime']) 

#  Überprüfen, ob relevante Spalten fehlende Werte enthalten
relevant_columns = ['DonationTime', 'Style_Hippie0_Glam1', 'Store']
missing_summary = df_filtered[relevant_columns].isnull().sum()
print(f"Fehlende Werte pro Spalte:\n{missing_summary}")

#  Gruppierte Mittelwerte und Gruppengrößen prüfen
grouped_means = df_filtered.groupby(['Style_Hippie0_Glam1', 'Store'])['DonationTime'].mean()
group_counts = df_filtered.groupby(['Style_Hippie0_Glam1', 'Store']).size()
print("\nMittelwerte der Gruppen:")
print(grouped_means)
print("\nAnzahl der Beobachtungen pro Gruppe:")
print(group_counts)

#  Überprüfen auf leere Gruppen
if grouped_means.empty or group_counts.empty:
    print("\nWARNUNG: Keine gültigen Gruppen vorhanden. Überprüfen Sie die Filterbedingungen und Daten.")
    print("Einzigartige Werte in 'Style_Hippie0_Glam1':", df_filtered['Style_Hippie0_Glam1'].unique())
    print("Einzigartige Werte in 'Store':", df_filtered['Store'].unique())
    raise ValueError("Keine gültigen Gruppen für ANOVA gefunden.")

# Two-Way ANOVA mit Total_price als Zielvariable
model = ols('DonationTime ~ C(Style_Hippie0_Glam1) + C(Store) + C(Style_Hippie0_Glam1):C(Store)', data=df_filtered).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

print("\nTwo-Way ANOVA Results:")
print(anova_table)


Fehlende Werte pro Spalte:
DonationTime           0
Style_Hippie0_Glam1    0
Store                  0
dtype: int64

Mittelwerte der Gruppen:
Style_Hippie0_Glam1  Store   
0                    BeachBar    51.234375
                     Mall        50.721910
1                    BeachBar    48.044118
                     Mall        46.209913
Name: DonationTime, dtype: float64

Anzahl der Beobachtungen pro Gruppe:
Style_Hippie0_Glam1  Store   
0                    BeachBar     64
                     Mall        356
1                    BeachBar     68
                     Mall        343
dtype: int64

Two-Way ANOVA Results:
                                       sum_sq     df         F    PR(>F)
C(Style_Hippie0_Glam1)           3.843455e+03    1.0  1.787820  0.181560
C(Store)                         1.567157e+02    1.0  0.072898  0.787231
C(Style_Hippie0_Glam1):C(Store)  4.845324e+01    1.0  0.022538  0.880700
Residual                         1.777885e+06  827.0       NaN       NaN


### Mall Donation Money Video Anova

In [56]:

valid_experiments = ["M_D_M", "M_D_A_M", "M_D_N_M"]

anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="DonationMoney",
    iv_style="Style_Glam0_Hippie1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="mall_donationMoney_style_x_video",
)


Missing values:
DonationMoney          0
Style_Glam0_Hippie1    0
Experiment_Type        0
dtype: int64

Group means:
Style_Glam0_Hippie1  Experiment_Type
0                    M_D_A_M            69.611111
                     M_D_M              38.593103
                     M_D_N_M            30.774194
1                    M_D_A_M            29.815789
                     M_D_M              26.038961
                     M_D_N_M            26.500000
Name: DonationMoney, dtype: float64

Group sizes:
Style_Glam0_Hippie1  Experiment_Type
0                    M_D_A_M             36
                     M_D_M              145
                     M_D_N_M             31
1                    M_D_A_M             38
                     M_D_M              154
                     M_D_N_M             38
dtype: int64

Two-Way ANOVA Results:
                                                  sum_sq     df          F  \
C(Style_Glam0_Hippie1)                      27660.064559    1.0  15.644509   


## Mall Donation Time Video

In [57]:

valid_experiments = ["M_D_T", "M_D_A_T", "M_D_N_T"]

anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="DonationTime",
    iv_style="Style_Hippie0_Glam1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="mall_donationTime_style_x_video",
)


Missing values:
DonationTime           1
Style_Hippie0_Glam1    0
Experiment_Type        0
dtype: int64

Group means:
Style_Hippie0_Glam1  Experiment_Type
0                    M_D_A_T            47.539474
                     M_D_N_T            39.986301
                     M_D_T              56.448980
1                    M_D_A_T            39.188406
                     M_D_N_T            40.736111
                     M_D_T              43.837209
Name: DonationTime, dtype: float64

Group sizes:
Style_Hippie0_Glam1  Experiment_Type
0                    M_D_A_T            76
                     M_D_N_T            73
                     M_D_T              99
1                    M_D_A_T            69
                     M_D_N_T            72
                     M_D_T              86
dtype: int64

Two-Way ANOVA Results:
                                                 sum_sq     df         F  \
C(Style_Hippie0_Glam1)                     6.147807e+03    1.0  2.810425   
C(Experimen

## Mall Trash Count Video

In [58]:
valid_experiments = ["M_T_A", "M_T_N", "M_T_NV"]

anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="trashCount",
    iv_style="Style_Hippie0_Glam1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="mall_trashcount_style_x_video",
)

anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="TotalTimeTrash",
    iv_style="Style_Hippie0_Glam1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="mall_TotalTrashTime_style_x_video",
)


Missing values:
trashCount             0
Style_Hippie0_Glam1    0
Experiment_Type        0
dtype: int64

Group means:
Style_Hippie0_Glam1  Experiment_Type
0                    M_T_A              4.400000
                     M_T_N              5.535714
                     M_T_NV             5.800000
1                    M_T_A              4.689655
                     M_T_N              7.833333
                     M_T_NV             6.300000
Name: trashCount, dtype: float64

Group sizes:
Style_Hippie0_Glam1  Experiment_Type
0                    M_T_A              30
                     M_T_N              28
                     M_T_NV             30
1                    M_T_A              29
                     M_T_N              30
                     M_T_NV             30
dtype: int64

Two-Way ANOVA Results:
                                                sum_sq     df         F  \
C(Style_Hippie0_Glam1)                       45.880724    1.0  1.034816   
C(Experiment_Type)   

## Donation Money Spendentafel BeachBar

In [59]:
valid_experiments = ["BB_T_D_M", "BB_T_D_SM"]

anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="DonationMoney",
    iv_style="Style_Glam0_Hippie1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="bb_donationMoney_style_x_spendentafel",
)


Missing values:
DonationMoney          0
Style_Glam0_Hippie1    0
Experiment_Type        0
dtype: int64

Group means:
Style_Glam0_Hippie1  Experiment_Type
0                    BB_T_D_M           48.717391
                     BB_T_D_SM          68.413793
1                    BB_T_D_M           34.060000
                     BB_T_D_SM          42.272727
Name: DonationMoney, dtype: float64

Group sizes:
Style_Glam0_Hippie1  Experiment_Type
0                    BB_T_D_M           46
                     BB_T_D_SM          29
1                    BB_T_D_M           50
                     BB_T_D_SM          33
dtype: int64

Two-Way ANOVA Results:
                                                  sum_sq     df         F  \
C(Style_Glam0_Hippie1)                      14457.136124    1.0  9.288671   
C(Experiment_Type)                           7003.186325    1.0  4.499528   
C(Style_Glam0_Hippie1):C(Experiment_Type)    1237.971185    1.0  0.795393   
Residual                                

## Donation Time Spendentafel BeachBar

In [60]:
valid_experiments = ["BB_T_D_T", "BB_T_D_ST"]
print("----------- ----------")


anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="DonationTime",
    iv_style="Style_Hippie0_Glam1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="bb_donationTime_style_x_spendentafel",
)

print("----------- TRASH COUNT ----------")

anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="trashCount",
    iv_style="Style_Hippie0_Glam1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="bb_trashcount_style_x_spendentafel",
)

print("----------- TOTAL TIME TRASH ----------")

anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="TotalTimeTrash",
    iv_style="Style_Hippie0_Glam1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="bb_trashtime_style_x_spendentafel",
)

----------- ----------

Missing values:
DonationTime           0
Style_Hippie0_Glam1    0
Experiment_Type        0
dtype: int64

Group means:
Style_Hippie0_Glam1  Experiment_Type
0                    BB_T_D_ST          57.903226
                     BB_T_D_T           44.969697
1                    BB_T_D_ST          57.375000
                     BB_T_D_T           39.750000
Name: DonationTime, dtype: float64

Group sizes:
Style_Hippie0_Glam1  Experiment_Type
0                    BB_T_D_ST          31
                     BB_T_D_T           33
1                    BB_T_D_ST          32
                     BB_T_D_T           36
dtype: int64

Two-Way ANOVA Results:
                                                  sum_sq     df         F  \
C(Style_Hippie0_Glam1)                        292.466349    1.0  0.193286   
C(Experiment_Type)                           7755.403602    1.0  5.125423   
C(Style_Hippie0_Glam1):C(Experiment_Type)     181.019046    1.0  0.119633   
Residual          

## Donation Money Spendentafel Mall

In [61]:
valid_experiments = ["M_D_S_M", "M_D_M"]

anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="DonationMoney",
    iv_style="Style_Glam0_Hippie1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="mall_donationMoney_style_x_spendentafel",
)


Missing values:
DonationMoney          0
Style_Glam0_Hippie1    0
Experiment_Type        0
dtype: int64

Group means:
Style_Glam0_Hippie1  Experiment_Type
0                    M_D_M              38.593103
                     M_D_S_M            45.176991
1                    M_D_M              26.038961
                     M_D_S_M            27.739496
Name: DonationMoney, dtype: float64

Group sizes:
Style_Glam0_Hippie1  Experiment_Type
0                    M_D_M              145
                     M_D_S_M            113
1                    M_D_M              154
                     M_D_S_M            119
dtype: int64

Two-Way ANOVA Results:
                                                  sum_sq     df          F  \
C(Style_Glam0_Hippie1)                      28616.232552    1.0  17.334835   
C(Experiment_Type)                           2168.804601    1.0   1.313795   
C(Style_Glam0_Hippie1):C(Experiment_Type)     778.225167    1.0   0.471425   
Residual                        

## Donation Time Spendentafel Mall

In [62]:
valid_experiments = ["M_D_T", "M_D_S_T"]

anova_results = run_two_way_anova(
    data_path=experiment_path1,
    valid_experiments=valid_experiments,
    dv="DonationTime",
    iv_style="Style_Hippie0_Glam1",
    iv_experiment="Experiment_Type",
    save_fn=save,
    save_name="mall_donationTime_style_x_spendentafel"
)


Missing values:
DonationTime           1
Style_Hippie0_Glam1    0
Experiment_Type        0
dtype: int64

Group means:
Style_Hippie0_Glam1  Experiment_Type
0                    M_D_S_T            54.981651
                     M_D_T              56.448980
1                    M_D_S_T            55.543103
                     M_D_T              43.837209
Name: DonationTime, dtype: float64

Group sizes:
Style_Hippie0_Glam1  Experiment_Type
0                    M_D_S_T            109
                     M_D_T               99
1                    M_D_S_T            116
                     M_D_T               86
dtype: int64

Two-Way ANOVA Results:
                                                  sum_sq     df         F  \
C(Style_Hippie0_Glam1)                       2924.023503    1.0  1.253062   
C(Experiment_Type)                           2499.205897    1.0  1.071010   
C(Style_Hippie0_Glam1):C(Experiment_Type)    4379.181627    1.0  1.876656   
Residual                             

## Follow Up T Tests for Donation Money Mall

In [63]:
import pandas as pd
from scipy import stats


def followup_ttests_donation_money(
    data_path: str,
    valid_videos: list[str],
    dv: str = "DonationMoney",
    style_col: str = "Style_Glam0_Hippie1",
    video_col: str = "Experiment_Type",
    glam_value: int = 0,
    hippie_value: int = 1,
):
    """
    Follow-up (simple effects) independent-samples t-tests:
    Glam vs. Hippie within each video condition.

    Uses Welch's t-test and Bonferroni correction.
    """

    df = pd.read_csv(data_path)
    df = df[df[video_col].isin(valid_videos)].copy()

    results = []
    k = len(valid_videos)
    alpha_bonf = 0.05 / k

    for video in valid_videos:
        sub = df[df[video_col] == video]

        glam = sub[sub[style_col] == glam_value][dv].dropna()
        hippie = sub[sub[style_col] == hippie_value][dv].dropna()

        t_stat, p_val = stats.ttest_ind(
            glam,
            hippie,
            equal_var=False  # Welch t-test
        )

        results.append({
            "Video": video,
            "n_glam": len(glam),
            "mean_glam": glam.mean(),
            "sd_glam": glam.std(ddof=1),
            "n_hippie": len(hippie),
            "mean_hippie": hippie.mean(),
            "sd_hippie": hippie.std(ddof=1),
            "mean_diff_glam_minus_hippie": glam.mean() - hippie.mean(),
            "t": float(t_stat),
            "p_uncorrected": float(p_val),
            "p_bonferroni": min(p_val * k, 1.0),
            "alpha_bonferroni": alpha_bonf,
            "significant_bonferroni": p_val < alpha_bonf
        })

    return pd.DataFrame(results)


valid_videos = ["M_D_A_M", "M_D_M", "M_D_N_M"]

ttest_results = followup_ttests_donation_money(
    data_path=experiment_path1,
    valid_videos=valid_videos
)

save("t_test_DonationMoney", ttest_results)

print(ttest_results)


     Video  n_glam  mean_glam    sd_glam  n_hippie  mean_hippie  sd_hippie  \
0  M_D_A_M      36  69.611111  57.646845        38    29.815789  41.300386   
1    M_D_M     145  38.593103  47.636572       154    26.038961  33.879425   
2  M_D_N_M      31  30.774194  37.992727        38    26.500000  35.164286   

   mean_diff_glam_minus_hippie         t  p_uncorrected  p_bonferroni  \
0                    39.795322  3.397497       0.001181      0.003544   
1                    12.554142  2.611861       0.009532      0.028595   
2                     4.274194  0.480571       0.632512      1.000000   

   alpha_bonferroni  significant_bonferroni  
0          0.016667                    True  
1          0.016667                    True  
2          0.016667                   False  
