In [1]:
library(readr)
library(dplyr)
library(lme4)

patient_gmv_demographics <- read_csv("patient_gmv_demographics.csv", 
    col_types = list(MGV = col_double(), subj_id=col_character(), 
    roi=col_factor(), diagnosis=col_double(), age=col_double(), 
    sex=col_factor(), site=col_factor()))


# Split into one dataframe per ROI
rois_all_sites <- split(patient_gmv_demographics, patient_gmv_demographics$roi)

# Remove sites where all values of diagnosis are 0
sites_to_remove <- patient_gmv_demographics %>%
    group_by(site) %>%
    filter(all(diagnosis == 0)) %>%
    pull(site)

patient_gmv_demographics <- patient_gmv_demographics %>%
    filter(!site %in% sites_to_remove)

    
# Split into one dataframe per ROI
rois <- split(patient_gmv_demographics, patient_gmv_demographics$roi)


Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Loading required package: Matrix



In [7]:
# Initialize an empty list to store the beta values
beta_values <- list()

# Calculate beta values for all data
for (roi in names(rois_all_sites)) {
    df <- rois_all_sites[[roi]]
    model <- lmer(MGV ~ diagnosis + age + sex + (1 | site), data = df)
    beta_diagnosis <- summary(model)$coefficients["diagnosis", "Estimate"]
    beta_values[[roi]] <- 1-beta_diagnosis
}
# Create the initial beta_df with all data
beta_df <- data.frame(roi = names(beta_values), diagnosis_beta = as.numeric(scale(unlist(beta_values))))

# Calculate beta values for each site separately
sites <- unique(patient_gmv_demographics$site)
for (site in sites) {
    beta_values <- list()

    for (roi in names(rois)) {
        df <- rois[[roi]] %>% filter(site == !!site)
        
        model <- lm(MGV ~ diagnosis + age + sex, data = df)
        beta_diagnosis <- summary(model)$coefficients["diagnosis", "Estimate"]
        beta_values[[roi]] <- 1 - beta_diagnosis
    }
    # Update beta_df with each site
    beta_df[[site]] <- as.numeric(scale(unlist(beta_values)))
}

print(head(beta_df))

  roi diagnosis_beta   Advan_inno BrainGluSchi   Baltimore      Dallas
1   1     0.25718131  0.291043131   0.36593423  0.41318446  0.31016700
2   2     0.03878835  0.001396665   0.09215471  0.08325069  0.07453358
3   3    -0.18154175 -0.096074636  -0.11125400 -0.08597366 -0.11085499
4   4    -0.46446309  0.090304378   0.21140249  0.17095151  0.10573363
5   5    -0.27003157 -0.537644416  -0.44313945 -0.48378234 -0.59825113
6   6     0.71584981  0.921162307   0.84525545  0.86881065  0.76137790
       dallas      boston    hartford     georgia      chicago      COBRE
1  0.37369550  0.38198607  0.33908434  0.29161783  0.395849146  0.3108886
2  0.02864569  0.04090069  0.07182185  0.03254091  0.124575309  0.1456402
3 -0.10196926 -0.16127865 -0.11129687 -0.08553870 -0.008409951 -0.1104580
4  0.16818608  0.18699687  0.17003789  0.21110846  0.166374599  0.2459180
5 -0.51740330 -0.52963310 -0.56869680 -0.44142242 -0.514078734 -0.5673075
6  0.81731753  0.77026488  0.84899713  0.91706078  0.795128

In [15]:
write.csv(beta_df, "./results/betas.csv", row.names = FALSE)
# write.table(beta_df$diagnosis_beta, "betas_diag.csv", col.names = FALSE, row.names = FALSE, sep = ",")

In [None]:
# Calculate the correlation between each column and diagnosis_beta
correlations <- sapply(beta_df[-1], function(x) cor(beta_df$diagnosis_beta, x, use = "complete.obs"))

# Print the correlations
# Convert correlations to a data frame
correlation_df <- data.frame(site = names(correlations), correlation = correlations)

# Print the correlation data frame
print(correlation_df, row.names = FALSE)
write.csv(correlation_df, "correlations.csv", row.names = FALSE)


In [7]:


# sites <- unique(patient_gmv_demographics$site)
# # Calculate beta values for each site separately and add as new columns
# for (site in sites) {
#     beta_values <- list()
#     for (roi in names(rois)) {
#         df <- rois[[roi]] %>% filter(site == !!site)
#         model <- lm(MGV ~ diagnosis + age + sex, data = df)
#         if ("diagnosis" %in% rownames(summary(model)$coefficients)) {
#             beta_diagnosis <- summary(model)$coefficients["diagnosis", "Estimate"]
#         } else {
#             print(summary(model)$coefficients)
#             beta_diagnosis <- NA
#         }
#         beta_values[[roi]] <- 1-beta_diagnosis
#     }
#     beta_df[[site]] <- as.numeric(scale(unlist(beta_values)))
# }

# print(beta_df)

                Estimate   Std. Error   t value     Pr(>|t|)
(Intercept)  0.583689309 0.0261329285 22.335396 6.087370e-19
age         -0.002272903 0.0007248293 -3.135777 4.108314e-03
sexF        -0.035693241 0.0185127324 -1.928037 6.442907e-02
                Estimate   Std. Error   t value     Pr(>|t|)
(Intercept)  0.600263391 0.0287162842 20.903240 3.314699e-18
age         -0.002823825 0.0007964819 -3.545372 1.453335e-03
sexF        -0.038260386 0.0203427980 -1.880783 7.082836e-02
               Estimate   Std. Error   t value     Pr(>|t|)
(Intercept)  0.60271856 0.0273149994 22.065480 8.314707e-19
age         -0.00273064 0.0007576155 -3.604256 1.248131e-03
sexF        -0.03794457 0.0193501189 -1.960948 6.027547e-02
               Estimate   Std. Error   t value     Pr(>|t|)
(Intercept)  0.42023942 0.0250879429 16.750653 8.642740e-16
age         -0.00171619 0.0006958453 -2.466339 2.028874e-02
sexF        -0.01454441 0.0177724580 -0.818368 4.203114e-01
                Estimate   Std. 