<a href="https://colab.research.google.com/github/dannynacker/strobe_depression/blob/main/WP2_monitoring.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

WP2 Sign-Up Data Monitoring

In [10]:
import pandas as pd
from IPython.display import display

#####
# Load
#####
# Load the WP2 Sign-Up Data CSV
df = pd.read_csv(
    '/content/wp2signuptest.csv', ### add the path to your Qualtrics Sign-Up CSV here!!!
    dtype={'excluded': str, 'incl_dem_sex': str}
)

#####
# Exclusions Summary
#####
excluded_counts = df['excluded'].value_counts()
summary_excluded = pd.DataFrame({
    'Category': [
        '# Passed',
        '# Excluded for Safety Reasons',
        '# Excluded for Demographic Reasons'
    ],
    'Count': [
        excluded_counts.get('FALSE', 0),
        excluded_counts.get('safety_screening', 0),
        excluded_counts.get('demographics', 0)
    ]
})
print("=== Exclusion Summary ===")
display(summary_excluded)

print("\n=== ======== ===")
print("\n=== ======== ===")
print("\n=== ======== ===")

#####
# Prepare Columns
#####
# Age → numeric
df['incl_dem_age'] = pd.to_numeric(df['incl_dem_age'], errors='coerce')

# PHQ-9 sum → numeric
df['phq9_sum'] = pd.to_numeric(df['phq9_sum'], errors='coerce')

# Sex → clean to 'male', 'female', 'other' only
df['incl_dem_sex'] = (
    df['incl_dem_sex']
      .str.strip()
      .str.lower()
)
valid_sex = ['male', 'female', 'other']
df.loc[~df['incl_dem_sex'].isin(valid_sex), 'incl_dem_sex'] = pd.NA

#####
# Split into Passed vs. Failed
#####
passed_df = df[df['excluded'] == 'FALSE']
failed_df = df[df['excluded'] != 'FALSE']

def summarize_group(group_df, title):
    print(f"\n=== {title} ===")

    # Age summary
    mean_age = group_df['incl_dem_age'].mean()
    sd_age   = group_df['incl_dem_age'].std()
    print(f"Age (years)  Mean ± SD: {mean_age:.2f} ± {sd_age:.2f}")

    # PHQ-9 summary
    mean_phq = group_df['phq9_sum'].mean()
    sd_phq   = group_df['phq9_sum'].std()
    print(f"PHQ-9 Sum    Mean ± SD: {mean_phq:.2f} ± {sd_phq:.2f}")

    # Sex breakdown
    sex_counts = (
        group_df['incl_dem_sex']
          .value_counts(dropna=True)
          .rename_axis('Sex')
          .reset_index(name='Count')
    )
    print("\nSex Breakdown:")
    display(sex_counts)

# Output for Passed
summarize_group(passed_df, "Passed Participants")

print("\n=== ======== ===")
print("\n=== ======== ===")
print("\n=== ======== ===")

# Output for Failed
summarize_group(failed_df, "Failed Participants")

print("\n=== ======== ===")
print("\n=== ======== ===")
print("\n=== ======== ===")

=== Exclusion Summary ===


Unnamed: 0,Category,Count
0,# Passed,0
1,# Excluded for Safety Reasons,535
2,# Excluded for Demographic Reasons,6






=== Passed Participants ===
Age (years)  Mean ± SD: nan ± nan
PHQ-9 Sum    Mean ± SD: nan ± nan

Sex Breakdown:


Unnamed: 0,Sex,Count






=== Failed Participants ===
Age (years)  Mean ± SD: 64.00 ± 24.02
PHQ-9 Sum    Mean ± SD: nan ± nan

Sex Breakdown:


Unnamed: 0,Sex,Count
0,other,4
1,female,1
2,male,1







Participant Wellbeing Live Monitoring

requires:
*   pre_session1 full_battery
*   sessions1-4 +1, 3, 5 day SMS
*   pre_sessions2-4
*   post_sessions1-4
*   post_session4final








In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

#####
# LOAD
#####
# Load each individual Qualtrics CSV
df_s1_pre  = pd.read_csv('/content/session1_pre.csv', dtype=str) ### add the path to your Qualtrics pre-session 1 CSV here!!!
df_s2to4_pre  = pd.read_csv('/content/sessions2through4_pre.csv',  dtype=str) ### add the path to your Qualtrics pre-sessions2through4 CSV here!!!
df_s2to4_post = pd.read_csv('/content/sessions1through4_post.csv', dtype=str) ### add the path to your Qualtrics post-sessions1through4 CSV here!!!
df_s4_post = pd.read_csv('/content/session4_post.csv', dtype=str) ### add the path to your Qualtrics post-session 4 CSV here!!!
df_sms     = pd.read_csv('/content/SMS.csv',               dtype=str) ### add the path to your Qualtrics SMS CSV here!!!

#####
# 2) COERCE TYPES
#####
# session_n everywhere, sms_n only in SMS
for df in (df_s1_pre, df_s2to4_pre, df_s2to4_post, df_s4_post):
    df['session_n'] = df['session_n'].astype(int)
df_sms['session_n'] = df_sms['session_n'].astype(int)
df_sms['sms_n']     = df_sms['sms_n'].astype(int)

# numeric columns
numeric_cols = [
    'phq9_sum', 'spane_p', 'spane_n',
    'm3vas_mood', 'm3vas_pleasure', 'm3vas_suicidal',
    'fisber_1', 'fisber_2', 'fisber_3'
]
for df in (df_s1_pre, df_s2to4_pre, df_s2to4_post, df_s4_post, df_sms):
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

#####
# 3) TAG TIMEPOINTS
#####
df_s1_pre    ['timepoint'] = 'session1_pre'
df_s2to4_pre ['timepoint'] = df_s2to4_pre.apply(
    lambda r: f"session{r.session_n}_pre", axis=1
)
df_s2to4_post['timepoint'] = df_s2to4_post.apply(
    lambda r: f"session{r.session_n}_post", axis=1
)
df_s4_post   ['timepoint'] = 'session4_post'
df_sms       ['timepoint'] = df_sms.apply(
    lambda r: f"SMS_{r.session_n}_{r.sms_n}", axis=1
)

#####
# 4) CONCATENATE
#####
df_long = pd.concat([
    df_s1_pre,
    df_s2to4_pre, df_s2to4_post,
    df_s4_post,
    df_sms
], ignore_index=True)

# ensure participant ID is a string (replace 'part_id' if your column is named differently)
df_long['part_id'] = df_long['part_id'].astype(str)

#####
# 5) ORDER THE TIMEPOINT FACTOR
#####
ordered_timepoints = [
    'session1_pre','session1_post',
    'SMS_1_1','SMS_1_3','SMS_1_5',
    'session2_pre','session2_post',
    'SMS_2_1','SMS_2_3','SMS_2_5',
    'session3_pre','session3_post',
    'SMS_3_1','SMS_3_3','SMS_3_5',
    'session4_pre','session4_post',
    'SMS_4_1','SMS_4_3','SMS_4_5'
]
df_long['timepoint'] = pd.Categorical(
    df_long['timepoint'],
    categories=ordered_timepoints,
    ordered=True
)

#####
# 6) PLOTTING FUNCTION
#####
def plot_measure(measure):
    plt.figure(figsize=(10,5))
    ax = plt.gca()
    for pid, grp in df_long.groupby('part_id'):
        series = (
            grp.sort_values('timepoint')
               [['timepoint', measure]]
               .dropna()
        )
        if series.empty:
            continue
        ax.plot(
            series['timepoint'].cat.codes,
            series[measure],
            linewidth=0.8,
            alpha=0.6
        )
    ax.set_title(measure, fontsize=14)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.set_xticks(range(len(ordered_timepoints)))
    ax.set_xticklabels(ordered_timepoints, rotation=45, ha='right')
    ax.set_ylabel(measure)
    plt.tight_layout()
    plt.show()

#####
# 7) LOOP THROUGH THE 9 MEASURES
#####
measures = [
    'phq9_sum','spane_p','spane_n',
    'm3vas_mood','m3vas_pleasure','m3vas_suicidal',
    'fisber_1','fisber_2','fisber_3'
]
for m in measures:
    plot_measure(m)