In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
import statsmodels.api as sm

from utils import (
    process_qualtrics,
    calculate_scores,
)

In [None]:
pd.set_option('display.max_columns', None)


In [None]:
data_file = "SHS-cues_April+9,+2025_09.56.tsv" #todo change to your data file
data_folder = "data"
data_path = os.path.join(data_folder, data_file)

Load the data

In [None]:
df_qualtrics = pd.read_csv(data_path, sep='\t', encoding='utf-16')
df_qualtrics.head()

In [None]:
df = process_qualtrics(df_qualtrics, min_duration=2*60) # min duration of 5 minutes
print(f"After filtering, we have {len(df)} participants.")
df.head(3)

In [None]:
df = calculate_scores(df)
df.head(3)

In [None]:
good_cues = df[df.cue_group == "GoodCues"]
bad_cues = df[df.cue_group == "BadCues"]

print(f"GOOD CUES\n group1: mean={good_cues.group1_score.mean():.3f}, std={good_cues.group1_score.std():.3f}\n group2: mean={good_cues.group2_score.mean():.3f}, std={good_cues.group2_score.std():.3f}")
print(f"BAD CUES\n group1: mean={bad_cues.group1_score.mean():.3f}, std={bad_cues.group1_score.std():.3f}\n group2: mean={bad_cues.group2_score.mean():.3f}, std={bad_cues.group2_score.std():.3f}")

# regression analysis

linear regression to see the effect of the cue_group on the group2_score

In [None]:
formula = "group2_score ~ C(cue_group)"
results = sm.formula.ols(formula, data=df).fit()
print(results.summary())

Group 1 shouldn't be affected by the cue_group so we expect the coefficient to be 0 

In [None]:
formula = "group1_score ~ C(cue_group)"
results = sm.formula.ols(formula, data=df).fit()
print(results.summary())

## change of scores

We want to know how does the exposure to the cues affect the score. 
We can look at the difference between the scores before and after the exposure to the cues.

In [None]:
formula = "score_diff ~ C(cue_group)"
results = sm.formula.ols(formula, data=df).fit()
print(results.summary())