In [1]:
library(dplyr)
library(lme4)
library(car) # for VIF calculation
library(tidyverse)
library(boot)
library(modelsummary)
library(lmerTest)
library(ggeffects)
library(magrittr)
library(broom)
library(broom.mixed)
library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(jtools)
library(stargazer)
set.seed(12696921)



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loading required package: carData


Attaching package: ‘car’


The following object is masked from ‘package:dplyr’:

    recode


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mggplot2  [39m 3.5.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.1
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mtidyr[39m::[32mexpand()[39m masks [34mMatrix[39m::expand()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks 

# CORONA

In [2]:

CORONA_INTERIM_PATH <- "/m/cs/work/luongn1/digirhythm/data/interim/corona/"
CORONA_PROCESSED_PATH <- "/m/cs/work/luongn1/digirhythm/data/processed/corona/"


SIMILARITY_PATH <- "/m/cs/work/luongn1/digirhythm/data/processed/corona/similarity_matrix/"

# Read survey data
survey <- read.csv(paste0(CORONA_INTERIM_PATH, "survey_all.csv"))

# Filter out 'non-binary' gender
survey <- survey %>% filter(gender != 'non-binary')

# Read similarity data
sim_baseline <- read.csv(paste0(SIMILARITY_PATH, "si/similarity_baseline_4epochs.csv"), row.names = 1)

# Keep only necessary columns
IVs <- c("subject_id", "age", "gender", "occupation", "origin", "children_at_home", "BIG5_Extraversion", "BIG5_Agreeableness", "BIG5_Conscientiousness", "BIG5_Neuroticism", "BIG5_Openness", "MEQ")
demographics_df <- survey %>% select(all_of(IVs)) %>% drop_na()

# Calculate average similarity
avg_sim_baseline <- rowMeans(sim_baseline, na.rm = TRUE)
avg_sim_baseline <- data.frame(subject_id = rownames(sim_baseline), DV = avg_sim_baseline)

# Merge datasets
dataset <- merge(avg_sim_baseline, demographics_df, by = 'subject_id', all.x = TRUE)

# Define a function to extract the coefficients
boot_fn <- function(data, indices) {
  d <- data[indices, ]  # Extract the bootstrapped sample
  fit <- lm(DV ~ age + origin + occupation + children_at_home + MEQ, data = d)
  return(coef(fit))
}

# Regression analysis with bootstrapping
regression_analysis <- function(df, y, X) {
  df <- df %>% drop_na()
  model <- lm(as.formula(paste(y, "~", paste(X, collapse = "+"))), data = df)
  vif_values <- vif(model)
summ(model, scale=TRUE, vifs=TRUE, confint = TRUE, digits = 3)
}

# Run the analysis
regression_analysis(dataset, "DV", c("age", "origin", "gender", "occupation", "children_at_home", "MEQ"))


[4mMODEL INFO:[24m
[3mObservations:[23m 115
[3mDependent Variable:[23m DV
[3mType:[23m OLS linear regression 

[4mMODEL FIT:[24m
[3mF[23m(6,108) = 3.198, [3mp[23m = 0.006
[3mR² = [23m0.151
[3mAdj. R² = [23m0.104 

[3mStandard errors: OLS[23m
---------------------------------------------------------------------------
                           Est.     2.5%    97.5%    t val.       p     VIF
---------------------- -------- -------- -------- --------- ------- -------
(Intercept)               0.680    0.669    0.690   125.246   0.000        
age                       0.005    0.000    0.010     2.018   0.046   1.059
origin                   -0.017   -0.030   -0.004    -2.509   0.014   1.415
gender1                   0.005   -0.006    0.016     0.960   0.339   1.073
occupation                0.014    0.002    0.027     2.362   0.020   1.482
children_at_home          0.002   -0.003    0.007     0.934   0.352   1.016
MEQ                       0.007    0.002    0.012     

# MOMO

In [3]:
MOMO_INTERIM_PATH <- "/m/cs/work/luongn1/digirhythm/data/interim/momo/"
MOMO_PROCESSED_PATH <- "/m/cs/work/luongn1/digirhythm/data/processed/momo/"

MOMO_SIMILARITY_PATH <- "/m/cs/work/luongn1/digirhythm/data/processed/momo/similarity_matrix/"

# Read survey data
survey <- read.csv(paste0(MOMO_INTERIM_PATH, "survey_all.csv"))
# Relevel
survey$group <- as.factor(survey$group)
survey <- within(survey, group <- relevel(group, ref = 'mmm-control'))

# Read similarity data
sim_baseline <- read.csv(paste0(MOMO_SIMILARITY_PATH, "si/similarity_baseline_4epochs.csv"), row.names = 1)

# Keep only necessary columns
IVs <- c("user", "bg_age", "bg_sex", "children", "work", "group")
demographics_df <- survey %>% select(all_of(IVs)) %>% drop_na()

# Calculate average similarity
avg_sim_baseline <- rowMeans(sim_baseline, na.rm = TRUE)
avg_sim_baseline <- data.frame(user = rownames(sim_baseline), DV = avg_sim_baseline)

# Merge datasets
dataset <- merge(avg_sim_baseline, demographics_df, by = 'user', all.x = TRUE)

# Define a function to extract the coefficients
boot_fn <- function(data, indices) {
  d <- data[indices, ]  # Extract the bootstrapped sample
  fit <- lm(DV ~ age + origin + occupation + children_at_home + MEQ, data = d)
  return(coef(fit))
}

# Regression analysis with bootstrapping
regression_analysis <- function(df, y, X) {
  df <- df %>% drop_na()
  model <- lm(as.formula(paste(y, "~", paste(X, collapse = "+"))), data = df)
  vif_values <- vif(model)
summ(model, scale=TRUE, vifs=TRUE, confint = TRUE, digits = 3)
}

# Run the analysis
regression_analysis(dataset, "DV", c("bg_age", "bg_sex", "children", "work", "group"))


[4mMODEL INFO:[24m
[3mObservations:[23m 50
[3mDependent Variable:[23m DV
[3mType:[23m OLS linear regression 

[4mMODEL FIT:[24m
[3mF[23m(7,42) = 1.253, [3mp[23m = 0.296
[3mR² = [23m0.173
[3mAdj. R² = [23m0.035 

[3mStandard errors: OLS[23m
---------------------------------------------------------------------
                       Est.     2.5%   97.5%   t val.       p     VIF
------------------ -------- -------- ------- -------- ------- -------
(Intercept)           0.647    0.602   0.691   29.373   0.000        
bg_age                0.010   -0.004   0.024    1.442   0.157   1.101
bg_sex                0.017   -0.017   0.050    0.989   0.329   1.098
children             -0.012   -0.044   0.019   -0.789   0.434   1.338
work                 -0.002   -0.036   0.031   -0.148   0.883   1.591
groupmmm-bd          -0.018   -0.072   0.036   -0.666   0.509   2.187
groupmmm-bpd          0.027   -0.023   0.078    1.084   0.285   2.187
groupmmm-mdd          0.017   -0.022   0

In [4]:
table(survey$work_regular)
#colnames(survey)


 0  1 
51 27 

# Wellbeing ~ Regularity

In [5]:
create_formula <- function(dv, frequency) {
  formula_str <- sprintf(
    '%s ~ 1 + 
    baseline_similarity +
    steps.night.%s.sum.norm + steps.morning.%s.sum.norm + steps.afternoon.%s.sum.norm + steps.evening.%s.sum.norm +
    steps.total.norm +
    tst.norm.mean + 
    midsleep.norm.mean + 
    heart_rate_variability_avg.mean.norm + 
    age.norm + gender + occupation + origin +
    (1|subject_id)',
    dv, frequency, frequency, frequency, frequency
  )
  
  as.formula(formula_str)
}

frequency <- '7ds'

# Read survey data
data <- read.csv(paste0(CORONA_PROCESSED_PATH, sprintf("%s_regularity_wellbeing.csv", frequency) ))

# Fitting the model for y1 with a random intercept for 'subject'
# Define the formula
formula1 <- create_formula('PHQ', frequency)

formula2 <- create_formula('PSS', frequency)

formula3 <- create_formula('PSQI', frequency)


# Fit the linear mixed-effects model
fit1 <- lmer(formula1, data = data)
fit2 <- lmer(formula2, data = data)
fit3 <- lmer(formula3, data = data)

# Display the summary of the model fit
tab_model(fit1, fit2, fit3,
         show.r2 = TRUE,
    show.icc = FALSE,
    show.re.var = FALSE,
    emph.p = TRUE,
    file = sprintf("%s_wellbeing_reg.html", frequency))

class(fit1) <- "lmerMod"
stargazer(fit1, out='4.tex')

fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

fixed-effect model matrix is rank deficient so dropping 1 column / coefficient




% Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
% Date and time: Tue, Apr 16, 2024 - 22:09:05
\begin{table}[!htbp] \centering 
  \caption{} 
  \label{} 
\begin{tabular}{@{\extracolsep{5pt}}lc} 
\\[-1.8ex]\hline 
\hline \\[-1.8ex] 
 & \multicolumn{1}{c}{\textit{Dependent variable:}} \\ 
\cline{2-2} 
\\[-1.8ex] & PHQ \\ 
\hline \\[-1.8ex] 
 baseline\_similarity & $-$0.656 \\ 
  & (0.735) \\ 
  & \\ 
 steps.night.7ds.sum.norm & $-$1.005 \\ 
  & (1.382) \\ 
  & \\ 
 steps.morning.7ds.sum.norm & $-$1.330$^{***}$ \\ 
  & (0.513) \\ 
  & \\ 
 steps.afternoon.7ds.sum.norm & $-$0.495 \\ 
  & (0.443) \\ 
  & \\ 
 steps.total.norm & $-$0.957$^{***}$ \\ 
  & (0.249) \\ 
  & \\ 
 tst.norm.mean & $-$0.558 \\ 
  & (0.473) \\ 
  & \\ 
 midsleep.norm.mean & $-$0.886$^{**}$ \\ 
  & (0.396) \\ 
  & \\ 
 heart\_rate\_variability\_avg.mean.norm & $-$0.361$^{**}$ \\ 
  & (0.174) \\ 
  & \\ 
 age.norm & $-$0.504 \\ 
  & (0.786) \\ 
  & \\ 
 ge

# Personality ~ Regularity

In [17]:

CORONA_INTERIM_PATH <- "/m/cs/work/luongn1/digirhythm/data/interim/corona/"
CORONA_PROCESSED_PATH <- "/m/cs/work/luongn1/digirhythm/data/processed/corona/"


SIMILARITY_PATH <- "/m/cs/work/luongn1/digirhythm/data/processed/corona/similarity_matrix/"

# Read survey data
survey <- read.csv(paste0(CORONA_INTERIM_PATH, "survey_all.csv"))

# Filter out 'non-binary' gender
survey <- survey %>% filter(gender != 'non-binary')

# Read similarity data
sim_baseline <- read.csv(paste0(SIMILARITY_PATH, "si/similarity_baseline_4epochs.csv"), row.names = 1)

# Keep only necessary columns
IVs <- c("subject_id", "age", "gender", "occupation", "origin", "children_at_home", "BIG5_Extraversion", "BIG5_Agreeableness", "BIG5_Conscientiousness", "BIG5_Neuroticism", "BIG5_Openness", "MEQ")
demographics_df <- survey %>% select(all_of(IVs)) %>% drop_na()

# Calculate average similarity
avg_sim_baseline <- rowMeans(sim_baseline, na.rm = TRUE)
avg_sim_baseline <- data.frame(subject_id = rownames(sim_baseline), baseline_similarity = avg_sim_baseline)

# Merge datasets
dataset <- merge(avg_sim_baseline, demographics_df, by = 'subject_id', all.x = TRUE)

dataset

subject_id,baseline_similarity,age,gender,occupation,origin,children_at_home,BIG5_Extraversion,BIG5_Agreeableness,BIG5_Conscientiousness,BIG5_Neuroticism,BIG5_Openness,MEQ
<chr>,<dbl>,<dbl>,<chr>,<int>,<int>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>
1015720192,0.6403047,47,1,0,0,0,12,13,13,10,15,14
1058094106,0.7071775,44,0,0,0,1,12,8,7,11,12,16
1076660686,0.7033965,46,1,0,0,1,13,10,13,4,12,19
1079748170,0.7203042,43,1,0,0,1,8,15,14,7,13,16
109662472,0.6677296,30,1,1,1,0,12,14,8,6,14,15
1103368722,0.7150992,41,1,0,0,1,12,12,9,6,10,10
1132690122,0.6442289,39,1,0,0,1,8,12,12,8,7,11
1203035370,0.7224922,31,1,1,1,0,7,11,4,15,14,17
1299734522,0.7152356,38,1,0,0,1,12,11,13,12,12,19
1348392268,0.6758627,27,1,0,0,0,10,15,12,11,10,10


In [22]:
create_formula <- function(dv, frequency) {
  formula_str <- sprintf(
    '%s ~ 1 + 
    baseline_similarity +
    steps.total.norm +
    tst.norm.mean + 
    midsleep.norm.mean + 
    heart_rate_variability_avg.mean.norm + 
    age.norm + gender + occupation + origin +
    (1|subject_id)',
    dv, frequency, frequency, frequency, frequency
  )
  
  as.formula(formula_str)
}

frequency <- '7ds'

# Read survey data
data <- read.csv(paste0(CORONA_PROCESSED_PATH, sprintf("%s_regularity_wellbeing.csv", frequency) ))

# Fitting the model for y1 with a random intercept for 'subject'
# Define the formula
formula1 <- create_formula('BIG5_Agreeableness', frequency)

formula2 <- create_formula('BIG5_Conscientiousness', frequency)

formula3 <- create_formula('BIG5_Neuroticism', frequency)

formula4 <- create_formula('BIG5_Openness', frequency)

formula5 <- create_formula('BIG5_Extraversion', frequency)


# Fit the linear mixed-effects model
fit1 <- lmer(formula1, data = data)
fit2 <- lmer(formula2, data = data)
fit3 <- lmer(formula3, data = data)
fit3 <- lmer(formula4, data = data)
fit3 <- lmer(formula5, data = data)

# Display the summary of the model fit
tab_model(fit1, fit2, fit3,fit4, fit5,
         show.r2 = TRUE,
    show.icc = FALSE,
    show.re.var = FALSE,
    emph.p = TRUE,
    file = sprintf("%s_personality_reg.html", frequency))

class(fit1) <- "lmerMod"
stargazer(fit1, out='4.tex')

“4 arguments not used by format '%s ~ 1 + 
    baseline_similarity +
    steps.total.norm +
    tst.norm.mean + 
    midsleep.norm.mean + 
    heart_rate_variability_avg.mean.norm + 
    age.norm + gender + occupation + origin +
    (1|subject_id)'”
“4 arguments not used by format '%s ~ 1 + 
    baseline_similarity +
    steps.total.norm +
    tst.norm.mean + 
    midsleep.norm.mean + 
    heart_rate_variability_avg.mean.norm + 
    age.norm + gender + occupation + origin +
    (1|subject_id)'”
“4 arguments not used by format '%s ~ 1 + 
    baseline_similarity +
    steps.total.norm +
    tst.norm.mean + 
    midsleep.norm.mean + 
    heart_rate_variability_avg.mean.norm + 
    age.norm + gender + occupation + origin +
    (1|subject_id)'”
“4 arguments not used by format '%s ~ 1 + 
    baseline_similarity +
    steps.total.norm +
    tst.norm.mean + 
    midsleep.norm.mean + 
    heart_rate_variability_avg.mean.norm + 
    age.norm + gender + occupation + origin +
    (1|subject_id)'”


ERROR: Error in eval(expr, envir, enclos): object 'fit4' not found


In [19]:
data

subject_id,steps.night.7ds.sum.norm,steps.morning.7ds.sum.norm,steps.afternoon.7ds.sum.norm,steps.evening.7ds.sum.norm,steps.total.norm,tst.norm.mean,midsleep.norm.mean,heart_rate_variability_avg.mean.norm,age,⋯,PSS,PSQI,PHQ,BIG5_Extraversion,BIG5_Agreeableness,BIG5_Conscientiousness,BIG5_Neuroticism,BIG5_Openness,baseline_similarity,age.norm
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>
2123461352,0,0,0,0,0,0.6062652,0.3030109,0.1924399,44,⋯,10,2,1,6,13,11,6,14,,


In [21]:
data <- read.csv(paste0(CORONA_PROCESSED_PATH, sprintf("%s_regularity_wellbeing.csv", frequency) ))
data

subject_id,steps.night.7ds.sum.norm,steps.morning.7ds.sum.norm,steps.afternoon.7ds.sum.norm,steps.evening.7ds.sum.norm,steps.total.norm,tst.norm.mean,midsleep.norm.mean,heart_rate_variability_avg.mean.norm,age,⋯,PSS,PSQI,PHQ,BIG5_Extraversion,BIG5_Agreeableness,BIG5_Conscientiousness,BIG5_Neuroticism,BIG5_Openness,baseline_similarity,age.norm
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>
1015720192,8.638502e-04,0.09391453,0.4536498,0.4515719,0.6933412,0.5744872,0.5921755,0.4071429,47,⋯,13,8,1,12,13,13,10,15,0.6178263,0.8503937
1015720192,2.039089e-04,0.10621616,0.2596780,0.6339019,0.7938665,0.4899244,0.4754001,0.4571429,47,⋯,17,8,2,12,13,13,10,15,0.7519990,0.8503937
1015720192,4.951312e-04,0.20294190,0.4526737,0.3438893,0.7846476,0.5179921,0.5148192,0.3214286,47,⋯,15,5,1,12,13,13,10,15,0.5994990,0.8503937
1015720192,6.643257e-04,0.20298947,0.3428585,0.4534877,0.8528462,0.5034185,0.5471251,0.2785714,47,⋯,14,7,1,12,13,13,10,15,0.6651933,0.8503937
1015720192,8.071432e-04,0.18389413,0.5774269,0.2378718,0.4813316,0.6484347,0.5426793,0.3142857,47,⋯,12,8,0,12,13,13,10,15,0.5193260,0.8503937
1015720192,7.633709e-04,0.25308927,0.2695176,0.4766297,0.5089315,0.5982368,0.5216360,0.2285714,47,⋯,16,5,0,12,13,13,10,15,0.4951396,0.8503937
1015720192,4.580327e-05,0.21443948,0.4599336,0.3255811,0.7068336,0.5300468,0.4226437,0.3357143,47,⋯,14,4,2,12,13,13,10,15,0.5415536,0.8503937
1015720192,1.478604e-03,0.29563260,0.2665681,0.4363207,0.7335109,0.4613170,0.4510966,0.3285714,47,⋯,12,3,1,12,13,13,10,15,0.6369246,0.8503937
1015720192,3.476554e-04,0.11825395,0.3639338,0.5174646,0.7915598,0.4870457,0.4342027,0.4428571,47,⋯,11,4,1,12,13,13,10,15,0.6838577,0.8503937
1015720192,1.016406e-03,0.18335083,0.3308402,0.4847926,0.7326124,0.4661749,0.4582098,0.2928571,47,⋯,12,5,1,12,13,13,10,15,0.7038447,0.8503937
