# Protocol Validation for PD25

This notebook contains results validating the PD25 template.

In [1]:
# initialize libraries
library(plyr)
library(digest)
library(reshape2)
library(ggplot2)
library("plot3D")

In [2]:
# useful functions

# calculate the distance between two sets of coordinates
dist3D <- function(coord1, coord2) { # vector X,Y,Z
        xdist <- coord1[1] - coord2[1] # could also write as coord1$X, etc.
        ydist <- coord1[2] - coord2[2]
        zdist <- coord1[3] - coord2[3]
        return(as.numeric(sqrt(xdist^2+ydist^2+zdist^2)))
}

# calculate the pairwise distance between an array of 3D coordinates
pairwise_dist3D <- function(temp_coords) { # labeled X,Y,Z
        N <- length(temp_coords$X)
        dist_vec <- rep(0,N) # create vector
        sum_dist <- 0 # initialize to zero
        count <- 0
        for (i in 1:(N-1)) {
                for (j in (i+1):N) {
                        if (i != j) {
                                count <- count + 1
                                first_coord <- temp_coords[i,]
                                second_coord <- temp_coords[j,]
                                curr_dist <- dist3D(first_coord, second_coord)
                                sum_dist <- sum_dist + curr_dist
                                dist_vec[count] <- curr_dist
                        }
                }
        }
        return(c(as.numeric(mean(dist_vec)),as.numeric(sd(dist_vec))))
}

In [22]:
# initialize variables and load in raw fcsv data into df_raters
setwd('~/GitHub/afids-examples/template_validation/sub-PD25/data/input_afid/')

df_afid <- read.table('~/GitHub/afids-analysis/etc/afids.csv', sep=",", header=TRUE) # TODO: change path to local

df_raters <- data.frame(uid=integer(), afid=integer(),X=double(),Y=double(),Z=double(),rater=factor(),
                        subject=factor(),mri_type=factor(),session=integer(),date=integer(),
                        name=character(),description=character(),stringsAsFactors = FALSE)

csv_files <- list.files(".", "*.fcsv")

for (i in 1:length(csv_files)) {
        curr_split <- unlist(strsplit(csv_files[i],"_"))
    
        if (length(curr_split)>1) { # extract name and session data
                rater_subject <- curr_split[1]
                rater_mri_type <- curr_split[2]
                rater_name <- curr_split[3]
                rater_session <- as.numeric(curr_split[4])
                rater_date <- as.numeric(unlist(strsplit(curr_split[5],"[.]"))[1])
                rater_filename <- csv_files[i]
        }
        i
        curr_rater <- read.table(csv_files[i], header=FALSE, sep=",")
        df_rater <- data.frame(afid = 1:length(curr_rater$V1))
        
        df_rater <- cbind(df_rater,X=curr_rater[2],Y=curr_rater[3],Z=curr_rater[4],rater=rater_name,
                          subject=rater_subject,mri_type=rater_mri_type,
                          filename=rater_filename,session=rater_session,date=rater_date,name=curr_rater[12],
                          description=curr_rater[13])
        
        df_rater <- rename(df_rater, c("V2"="X","V3"="Y","V4"="Z","V12"="name","V13"="description"))
        df_raters <- rbind(df_raters,df_rater)
}

afid,X,Y,Z,rater,subject,mri_type,filename,session,date,name,description
1,0.167388,2.7819,-4.31386,AT,sub-PD25,T1w,sub-PD25_T1w_AT_1_20181018.fcsv,1,20181018,1,AC
2,-0.283672,-24.7725,-1.03844,AT,sub-PD25,T1w,sub-PD25_T1w_AT_1_20181018.fcsv,1,20181018,2,PC
3,0.127445,-36.4691,-10.4572,AT,sub-PD25,T1w,sub-PD25_T1w_AT_1_20181018.fcsv,1,20181018,3,infracollicular sulcus
4,-0.320911,-23.2079,-19.526,AT,sub-PD25,T1w,sub-PD25_T1w_AT_1_20181018.fcsv,1,20181018,4,PMJ
5,-0.240057,-15.949,-9.53159,AT,sub-PD25,T1w,sub-PD25_T1w_AT_1_20181018.fcsv,1,20181018,5,superior interpeduncular fossa
6,14.938,-25.2252,-8.64039,AT,sub-PD25,T1w,sub-PD25_T1w_AT_1_20181018.fcsv,1,20181018,6,R superior LMS


In [23]:
# mean coordinates for each landmark placement for each subject/session
df_mean <- ddply(df_raters, .(subject,afid), summarize, X=mean(X), Y=mean(Y), Z=mean(Z))

# initialize
df_raters$mean_AFLE <- NA
df_raters$outlier <- NA

df_raters$xdist <- NA
df_raters$ydist <- NA
df_raters$zdist <- NA

for (i in 1:dim(df_raters)[1]) {
        curr_rater <- df_raters[i,]
        
        # set current mean based on subject, session, fid
        mean_raters <- subset(df_mean, subject == curr_rater$subject & afid == curr_rater$afid)
        
        df_raters[i,]$xdist <- curr_rater$X - mean_raters$X
        df_raters[i,]$ydist <- curr_rater$Y - mean_raters$Y
        df_raters[i,]$zdist <- curr_rater$Z - mean_raters$Z
        curr_coords <- curr_rater[,2:4]
        mean_coords <- mean_raters[,3:5]
        df_raters[i,]$mean_AFLE <- dist3D(curr_coords, mean_coords)
        df_raters[i,]$outlier <- (df_raters[i,]$mean_AFLE > 10) # focus on true outliers (1cm+) first
}

In [26]:
# summary of findings
all_subjects <- subset(df_raters, session > 0) # ignore session 0 which was from the group tutorial
num_outliers <- sum(subset(all_subjects, outlier == TRUE)$outlier)
num_total <- length(all_subjects$outlier)

sprintf( "Total: %.2f +/- %.2f mm; Outliers: %d/%d (%.2f%%)",
        mean(all_subjects$mean_AFLE), sd(all_subjects$mean_AFLE),
        num_outliers, num_total, (num_outliers/num_total)*100 )

# summary of the outliers
summary_outliers <- subset(df_raters,outlier==TRUE)[,c("afid","subject","rater","name","description","mean_AFLE")]

# summary of results for each OASIS-1 scan that was annotated
summary_subjects_df <- ddply(df_raters, .(subject), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))
summary_afids_df <- ddply(df_raters, .(afid), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))

ddply(df_raters, .(rater), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))
#ddply(subset(df_raters, rater == 'Rater05'), .(rater,subject), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))

subset(df_raters, mean_AFLE > 5)

rater,mean,sd,max
AT,1.815665,2.001837,9.184762
GG,1.703725,2.384824,10.81677
HS,3.155089,8.572311,36.311558
MI,1.682358,1.933092,7.580204
PI,2.204844,2.195877,9.322041


Unnamed: 0,afid,X,Y,Z,rater,subject,mri_type,filename,session,date,name,description,mean_AFLE,outlier,xdist,ydist,zdist
25,25,22.2227,-4.87128,-29.2926,AT,sub-PD25,T1w,sub-PD25_T1w_AT_1_20181018.fcsv,1,20181018,25,R inferior AM temporal horn,9.184762,False,9.14754,0.17903,-0.80642
26,26,-23.7174,-5.62707,-29.1149,AT,sub-PD25,T1w,sub-PD25_T1w_AT_1_20181018.fcsv,1,20181018,26,L inferior AM temporal horn,8.676563,False,-8.66142,-0.04612,-0.51032
57,25,22.6928,-4.03584,-28.5992,GG,sub-PD25,T1w,sub-PD25_T1w_GG_1_20181018.fcsv,1,20181018,25,R inferior AM temporal horn,9.671656,False,9.61764,1.01447,-0.11302
58,26,-25.8637,-5.69201,-29.0328,GG,sub-PD25,T1w,sub-PD25_T1w_GG_1_20181018.fcsv,1,20181018,26,L inferior AM temporal horn,10.81677,True,-10.80772,-0.11106,-0.42822
89,25,-21.935,-7.17123,-26.8398,HS,sub-PD25,T1w,sub-PD25_T1w_HS_1_20181018.fcsv,1,20181018,26,,35.112963,True,-35.01016,-2.12092,1.64638
90,26,21.2409,-6.15072,-27.7437,HS,sub-PD25,T1w,sub-PD25_T1w_HS_1_20181018.fcsv,1,20181018,25,,36.311558,True,36.29688,-0.56977,0.86088
114,18,-17.0657,-20.4703,30.1993,MI,sub-PD25,T1w,sub-PD25_T1w_MI_1_20181018.fcsv,1,20181018,18,L LV at PC,5.189977,False,1.68942,4.78004,1.11038
121,25,20.2371,-4.93405,-27.9121,MI,sub-PD25,T1w,sub-PD25_T1w_MI_1_20181018.fcsv,1,20181018,25,R inferior AM temporal horn,7.185852,False,7.16194,0.11626,0.57408
122,26,-22.5765,-4.84776,-28.0015,MI,sub-PD25,T1w,sub-PD25_T1w_MI_1_20181018.fcsv,1,20181018,26,L inferior AM temporal horn,7.580204,False,-7.52052,0.73319,0.60308
153,25,22.1582,-4.23915,-29.7872,PI,sub-PD25,T1w,sub-PD25_T1w_PI_1_20181120.fcsv,1,20181120,25,R inferior AM temporal horn,9.211528,False,9.08304,0.81116,-1.30102


# Post-QC

Re-analysis after quality control and filtering of outliers.

In [27]:
# now QC all the output
# some fiducials were initially mislabeled and corrected in postQC directory by consensus among raters

# initialize variables and load in raw fcsv data into df_raters
setwd('~/GitHub/afids-examples/template_validation/sub-PD25/data/input_afid/')

df_afid <- read.table('~/GitHub/afids-analysis/etc/afids.csv', sep=",", header=TRUE)

df_raters <- data.frame(uid=integer(), fid=integer(),X=double(),Y=double(),Z=double(),rater=factor(),
                        subject=factor(),mri_type=factor(),session=integer(),date=integer(),
                        name=character(),description=character(),stringsAsFactors = FALSE)

csv_files <- list.files(".", "*.fcsv")

for (i in 1:length(csv_files)) {
        curr_split <- unlist(strsplit(csv_files[i],"_"))
    
        if (length(curr_split)>1) { # extract name and session data
                rater_subject <- curr_split[1]
                rater_mri_type <- curr_split[2]
                rater_name <- curr_split[3]
                rater_session <- as.numeric(curr_split[4])
                rater_date <- as.numeric(unlist(strsplit(curr_split[5],"[.]"))[1])
                rater_filename <- csv_files[i]
        }
        i
        curr_rater <- read.table(csv_files[i], header=FALSE, sep=",")
        df_rater <- data.frame(afid = 1:length(curr_rater$V1))
        
        df_rater <- cbind(df_rater,X=curr_rater[2],Y=curr_rater[3],Z=curr_rater[4],rater=rater_name,
                          subject=rater_subject,mri_type=rater_mri_type,
                          filename=rater_filename,session=rater_session,date=rater_date,name=curr_rater[12],
                          description=curr_rater[13])
        
        df_rater <- rename(df_rater, c("V2"="X","V3"="Y","V4"="Z","V12"="name","V13"="description"))
        df_raters <- rbind(df_raters,df_rater)
}

#df_raters$uid <- seq.int(nrow(df_raters)) # add a unique identifier
#levels(df_raters$rater) <- as.numeric( substr(levels(df_raters$rater), 6, 7 ) ) # rename raters based on rater number

# mean coordinates for each landmark placement for each subject/session
df_mean <- ddply(df_raters, .(subject, afid), summarize, X=mean(X), Y=mean(Y), Z=mean(Z))

# initialize
df_raters$mean_AFLE <- NA
df_raters$outlier <- NA

df_raters$xdist <- NA
df_raters$ydist <- NA
df_raters$zdist <- NA
for (i in 1:dim(df_raters)[1]) {
        curr_rater <- df_raters[i,]
        
        # set current mean based on subject, session, fid
        mean_raters <- subset(df_mean, subject == curr_rater$subject & afid == curr_rater$afid)
        
        df_raters[i,]$xdist <- curr_rater$X - mean_raters$X
        df_raters[i,]$ydist <- curr_rater$Y - mean_raters$Y
        df_raters[i,]$zdist <- curr_rater$Z - mean_raters$Z
        curr_coords <- curr_rater[,2:4]
        mean_coords <- mean_raters[,3:5]
        df_raters[i,]$mean_AFLE <- dist3D(curr_coords, mean_coords)
        df_raters[i,]$outlier <- (df_raters[i,]$mean_AFLE > 10) # focus on true outliers (1cm+) first
}

df_raters_QC <- subset(df_raters, outlier == FALSE)

# mean coordinates for each landmark placement for each subject/session
df_mean_QC <- ddply(df_raters_QC, .(subject, afid), summarize, X=mean(X), Y=mean(Y), Z=mean(Z))

# initialize
df_raters_QC$mean_AFLE <- NA
df_raters_QC$outlier <- NA

df_raters_QC$xdist <- NA
df_raters_QC$ydist <- NA
df_raters_QC$zdist <- NA

for (i in 1:dim(df_raters_QC)[1]) {
        curr_rater <- df_raters_QC[i,]
        
        # set current mean based on subject, session, fid
        mean_raters <- subset(df_mean_QC, subject == curr_rater$subject & afid == curr_rater$afid)
        
        df_raters_QC[i,]$xdist <- curr_rater$X - mean_raters$X
        df_raters_QC[i,]$ydist <- curr_rater$Y - mean_raters$Y
        df_raters_QC[i,]$zdist <- curr_rater$Z - mean_raters$Z
        curr_coords <- curr_rater[,2:4]
        mean_coords <- mean_raters[,3:5]
        df_raters_QC[i,]$mean_AFLE <- dist3D(curr_coords, mean_coords)
        df_raters_QC[i,]$outlier <- (df_raters_QC[i,]$mean_AFLE > 10) # focus on true outliers (1cm+) first
}

# include only MR1s
#df_raters_QC <- subset(df_raters_QC, mri_session == 'MR1')

In [28]:
# summary of findings
all_subjects <- subset(df_raters_QC, session > 0) # ignore session 0 which was from the group tutorial
num_outliers <- sum(subset(all_subjects, outlier == TRUE)$outlier)
num_total <- length(all_subjects$outlier)
sprintf( "Total: %.2f +/- %.2f mm; Outliers: %d/%d (%.2f%%)",
        mean(all_subjects$mean_AFLE), sd(all_subjects$mean_AFLE),
        num_outliers, num_total, (num_outliers/num_total)*100 )

# summary of results for each OASIS-1 scan that was annotated
summary_subjects_QC_df <- ddply(df_raters_QC, .(subject), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))
summary_afids_QC_df <- ddply(df_raters_QC, .(afid), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))


summary_AFLE_QC_df <- ddply(df_raters_QC, .(afid,subject), summarize, mean_AFLE=mean(mean_AFLE), sd_AFLE=sd(mean_AFLE), max_AFLE=max(mean_AFLE))


ddply(df_raters_QC, .(rater,subject), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))
ddply(df_raters_QC, .(afid), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))
#ddply(subset(df_raters, rater == 'Rater05'), .(rater,subject), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))


#summary_subjects_QC_df
#summary_afids_QC_df
#summary_AFLE_QC_df
#write.table(summary_AFLE_QC_df, file = "~/GitHub/afids-analysis/data/PHASE2_output_afid_postQC/PHASE2_subject_validation_AFLE.csv", row.names = FALSE, quote = FALSE, sep = ",")


rater,subject,mean,sd,max
AT,sub-PD25,1.2932915,0.7454276,3.375223
GG,sub-PD25,1.1311671,0.817671,3.825945
HS,sub-PD25,0.9846109,0.7435596,3.97617
MI,sub-PD25,1.3223496,1.2287051,5.189977
PI,sub-PD25,1.6855252,1.1942423,5.979177


afid,mean,sd,max
1,0.4197741,0.1452147,0.5577817
2,0.3210352,0.2414886,0.6542382
3,1.1363619,0.7946768,2.4456188
4,1.0034903,0.7114925,1.8079931
5,1.2949493,0.5604761,2.0276467
6,0.8123827,0.2721745,1.1897799
7,1.4112563,0.6898384,2.4342523
8,1.2801166,0.4571629,1.6881883
9,1.4902211,0.4738042,2.1063821
10,1.0609864,0.6196376,2.1097725


In [33]:
####################################################################
# EXPORT MEAN FIDUCIAL LOCATIONS AS FCSV FILE (with outliers filtered out)
####################################################################
setwd('~/GitHub/afids-examples/template_validation/sub-PD25/data/output_afid_postQC/')

for (curr_filename in levels(df_mean_QC$subject)) { # looping on each subject level
        curr_mean <- subset(df_mean_QC, subject==curr_filename)
        curr_fcsv <- data.frame(id=paste('vtkMRMLMarkupsFiducialNode',curr_mean$fid,sep="_"),x=curr_mean$X,y=curr_mean$Y,z=curr_mean$Z,
                                       ow=0,ox=0,oy=0,oz=1,
                                       vis=1,sel=1,lock=1,label=curr_mean$fid,desc=df_afid$description,
                                       associatedNodeID='vtkMRMLScalarVolumeNode1',stringsAsFactors = FALSE)
        
        # write out table (need to use file connection approach because of header information)
        curr_fcsv_name <- paste0(curr_filename,'_MEAN.fcsv')
        fio <- file(curr_fcsv_name, open="wt")
        writeLines(paste('# Markups fiducial file version = 4.6'),fio)
        writeLines(paste('# CoordinateSystem = 0'),fio)
        writeLines(paste('# columns = id,x,y,z,ow,ox,oy,oz,vis,sel,lock,label,desc,associatedNodeID'),fio)
        write.table(curr_fcsv,fio,sep=',',quote=FALSE,col.names=FALSE,row.names=FALSE)
        close(fio)
}

subject,afid,X,Y,Z
sub-PD25,1,-0.0759772,3.274982,-4.279742
sub-PD25,2,-0.1371087,-25.3942,-0.8968918
sub-PD25,3,-0.1020599,-36.29418,-9.305202
sub-PD25,4,-0.2937276,-22.89716,-19.37336
sub-PD25,5,-0.1039431,-14.26772,-9.410538
sub-PD25,6,13.80924,-25.55956,-8.46811


ERROR: Error in data.frame(id = paste("vtkMRMLMarkupsFiducialNode", curr_mean$fid, : arguments imply differing number of rows: 1, 32, 0


# Inter-Rater AFLE

In [None]:
# inter-rater AFLE
#   defined here as the mean pairwise distance between mean intra-rater AFID coordinates

df_meanrater <- ddply(subset(df_raters_QC, session > 0 & outlier==FALSE & mri_session=="MR1"), .(subject,rater,fid), summarize, X=mean(X), Y=mean(Y), Z=mean(Z))

df_interrater <- data.frame(fid=integer(),
                            subject=factor(),
                            interrater_mean=double(),
                            interrater_sd=double(),
                            stringsAsFactors=FALSE)

for (curr_subject in levels(df_raters_QC$subject)) {
        for (curr_fid in 1:32) {
                curr_coords <- subset(df_meanrater, fid == curr_fid & subject == curr_subject)
                if (length(curr_coords$fid) > 0) {
                        curr_output <- pairwise_dist3D(curr_coords[,4:6]) # careful here as index can shift.
                        curr_df <- data.frame(fid = curr_fid, subject = curr_subject, interrater_mean = curr_output[1], interrater_sd = curr_output[2])
                        df_interrater <- rbind(df_interrater, curr_df)
                        
                }
        }
}

In [None]:
# exploration of inter-rater AFLE data
# summary of findings
summary_interrater_subjects_df <- ddply(df_interrater, .(subject), summarize, mean=mean(interrater_mean), sd=sd(interrater_mean), max=max(interrater_mean))

sprintf( "Total: %.2f +/- %.2f mm",
        mean(df_interrater$interrater_mean), sd(df_interrater$interrater_mean) )

summary_interrater_afids_df <- ddply(df_interrater, .(fid), summarize, mean=mean(interrater_mean), sd=sd(interrater_mean), max=max(interrater_mean))

In [None]:
# summary of findings
# combine all the fid metrics into one table; similarly across subjects pre- and post-QC
names(df_afid) <- c('fid','description','side')
summary_all_afids_df <- merge(df_afid, summary_afids_df, by = "fid")
summary_all_afids_df <- merge(summary_all_afids_df, summary_afids_QC_df, by = "fid")
summary_all_afids_df <- merge(summary_all_afids_df, summary_interrater_afids_df, by = "fid")
names(summary_all_afids_df) <- c('fid','description','side','mean_AFLE_mean','mean_AFLE_sd','mean_AFLE_max','mean_AFLE_mean_QC','mean_AFLE_sd_QC','mean_AFLE_max_QC','interrater_AFLE_mean_QC','interrater_AFLE_sd_QC','interrater_AFLE_max_QC')

summary_all_afids_df[,-3:-1] <- round(summary_all_afids_df[,-3:-1], 2)
#summary_all_afids_df

names(df_afid) <- c('fid','description','side')
summary_all_subjects_df <- merge(summary_subjects_df, summary_subjects_QC_df, by = "subject")
summary_all_subjects_df <- merge(summary_all_subjects_df, summary_interrater_subjects_df, by = "subject")
names(summary_all_subjects_df) <- c('subject','mean_AFLE_mean','mean_AFLE_sd','mean_AFLE_max','mean_AFLE_mean_QC','mean_AFLE_sd_QC','mean_AFLE_max_QC','interrater_AFLE_mean_QC','interrater_AFLE_sd_QC','interrater_AFLE_max_QC')

summary_all_subjects_df[,-3:-1] <- round(summary_all_subjects_df[,-3:-1], 2)
#summary_all_subjects_df

In [None]:
# combining both pre-QC and post-QC tables and formatting
df_afids <- read.table('~/GitHub/afids-analysis/etc/afids.csv', sep=",", header=TRUE)

combined_afids_pre_post_QC <- df_afids[,1:2]

combined_afids_pre_post_QC$mean_AFLE <- paste0( sprintf( "%.2f", round(summary_all_afids_df$mean_AFLE_mean,2)), '±', sprintf( "%.2f", round(summary_all_afids_df$mean_AFLE_sd,2)), ' (', sprintf( "%.2f", round(summary_all_afids_df$mean_AFLE_max,2)), ')')
combined_afids_pre_post_QC$mean_AFLE_postQC <- paste0( sprintf( "%.2f", round(summary_all_afids_df$mean_AFLE_mean_QC,2)), '±', sprintf( "%.2f", round(summary_all_afids_df$mean_AFLE_sd_QC,2)), ' (', sprintf( "%.2f", round(summary_all_afids_df$mean_AFLE_max_QC,2)), ')')
combined_afids_pre_post_QC$interrater_AFLE <- paste0( sprintf( "%.2f", round(summary_all_afids_df$interrater_AFLE_mean_QC,2)), '±', sprintf( "%.2f", round(summary_all_afids_df$interrater_AFLE_sd_QC,2)), ' (', sprintf( "%.2f", round(summary_all_afids_df$interrater_AFLE_max_QC,2)), ')')

names(combined_afids_pre_post_QC) <- c('AFID', 'Description', #'Side',
                                 'Mean AFLE Pre-QC', 'Mean AFLE Post-QC', 'Inter-Rater AFLE Post-QC'
)
combined_afids_pre_post_QC$AFID <- sprintf( "%02d", combined_afids_pre_post_QC$AFID )
combined_afids_pre_post_QC

#write.table(combined_afids_pre_post_QC, file = "~/GitHub/afids-analysis/data/output_tables/PHASE2_subject_validation_afid_AFLE_prepostQC.csv", row.names = FALSE, quote = FALSE, sep = ",")


In [None]:
combined_subjects_pre_post_QC <- df_OAS1[,1:2]

combined_subjects_pre_post_QC$mean_AFLE <- paste0( sprintf( "%.2f", round(summary_all_subjects_df$mean_AFLE_mean,2)), '±', sprintf( "%.2f", round(summary_all_subjects_df$mean_AFLE_sd,2)), ' (', sprintf( "%.2f", round(summary_all_subjects_df$mean_AFLE_max,2)), ')')
combined_subjects_pre_post_QC$mean_AFLE_postQC <- paste0( sprintf( "%.2f", round(summary_all_subjects_df$mean_AFLE_mean_QC,2)), '±', sprintf( "%.2f", round(summary_all_subjects_df$mean_AFLE_sd_QC,2)), ' (', sprintf( "%.2f", round(summary_all_subjects_df$mean_AFLE_max_QC,2)), ')')
combined_subjects_pre_post_QC$interrater_AFLE <- paste0( sprintf( "%.2f", round(summary_all_subjects_df$interrater_AFLE_mean_QC,2)), '±', sprintf( "%.2f", round(summary_all_subjects_df$interrater_AFLE_sd_QC,2)), ' (', sprintf( "%.2f", round(summary_all_subjects_df$interrater_AFLE_max_QC,2)), ')')

names(combined_subjects_pre_post_QC) <- c('Subject', 'Session', #'Side',
                                       'Mean AFLE Pre-QC', 'Mean AFLE Post-QC', 'Inter-Rater AFLE Post-QC'
)
combined_subjects_pre_post_QC <- combined_subjects_pre_post_QC[,c(1,3:5)]

#write.table(combined_subjects_pre_post_QC, file = "~/GitHub/afids-analysis/data/output_tables/PHASE2_subject_validation_subjects_AFLE_prepostQC.csv", row.names = FALSE, quote = FALSE, sep = ",")


# Secondary Analyses

We evaluated whether there was any evidence of an effect of demographics on AFLE.

In [None]:
# subset for age calculations (linear regression)
df_demographics <- merge(df_raters_QC, df_OAS1, by = 'subject')
l <- lm(mean_AFLE ~ age, data = df_demographics)
s <- summary(l) # for session, the estimate was positive: 0.0023
round(cbind(l$coeff,s$coefficients[,4]),4) # first column is the effect, second column is the pval

#cor.test(df_demographics$mean_AFLE, df_demographics$age, method = 'kendall')
#plot(df_demographics$age, df_demographics$mean_AFLE, xlim = c(20,100), ylim = c(0,10))


## Did AFLE worsen with the age of the subject for specific AFIDs?

We wanted to see if specific AFIDs tended to worsen with age of the OAS1 participant scan. Worsened for AFID17-18: bilateral LV at PC.

In [None]:
# Did raters improve placing specific AFIDs?
# create dataframe for linear model for each rater and p-values
models = dlply(df_demographics, .(fid), lm, formula = mean_AFLE ~ age)
# also extract p-values for intercept and session
qual <- laply(models, function(mod) summary(mod)$coefficients[,4])

coefs = ldply(models, coef)
summary_aging_afids <- cbind(coefs,qual)
summary_aging_afids <- summary_aging_afids[,c(1,2,4,3,5)]
names(summary_aging_afids)[c(3,5)] <- c('pval_(Intercept)','pval_session')

# FDR correction
summary_aging_afids$pval_session_adjusted <- p.adjust(summary_aging_afids$pval_session, "fdr")
summary_aging_afids$pval_session_significant <- (summary_aging_afids$pval_session_adjusted < 0.05)

# Round and display the table
summary_aging_afids[,c(2,4)] <- round( summary_aging_afids[,c(2,4)], 2)
summary_aging_afids[,c(3,5,6)] <- round( summary_aging_afids[,c(3,5,6)], 4)
summary_aging_afids

In [None]:
sessionInfo()