# Phase 2: Protocol Validation for Individual Subjects¶

This notebook contains results validating the AFID protocol on individual subject scans from several openly shared datasets (Newcastle, ION, ECNU (Kwok), and AMU). The protocol and specific instructions for placing AFIDs were finalized on a consensus basis among raters and participants of the afids-macaca project on BrainWeb (Phase 1).

To be more sensitive to discrepancies in rater placements, we defined outliers as those placements with a value of > 1.5 mm (3 voxels) from the mean, although analysis were performed with and without outlier removal.

The first step is to initialize the variables, define useful functions, and load all the raw fcsv data into df_raters.


In [1]:
# initialize libraries
library(plyr)
library(digest)
library(reshape2)
library(ggplot2)
#library("plot3D")

Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang


In [2]:
# useful functions

# calculate the distance between two sets of coordinates
dist3D <- function(coord1, coord2) { # vector X,Y,Z
        xdist <- coord1[1] - coord2[1] # could also write as coord1$X, etc.
        ydist <- coord1[2] - coord2[2]
        zdist <- coord1[3] - coord2[3]
        return(as.numeric(sqrt(xdist^2+ydist^2+zdist^2)))
}

# calculate the pairwise distance between an array of 3D coordinates
pairwise_dist3D <- function(temp_coords) { # labeled X,Y,Z
        N <- length(temp_coords$X)
        dist_vec <- rep(0,N) # create vector
        sum_dist <- 0 # initialize to zero
        count <- 0
        for (i in 1:(N-1)) {
                for (j in (i+1):N) {
                        if (i != j) {
                                count <- count + 1
                                first_coord <- temp_coords[i,]
                                second_coord <- temp_coords[j,]
                                curr_dist <- dist3D(first_coord, second_coord)
                                sum_dist <- sum_dist + curr_dist
                                dist_vec[count] <- curr_dist
                        }
                }
        }
        return(c(as.numeric(mean(dist_vec)),as.numeric(sd(dist_vec))))
}

In [3]:
# initialize variables and load in raw fcsv data into df_raters
# not using PHASE2_input_afid as 1 case of L/R flipping which was fixed postQC
setwd('~/Documents/GitHub/afids-macaca/data/PHASE2_input_afid/')

df_afids <- read.table('~/Documents/GitHub/afids-macaca/etc/afids.csv', sep=",", header=TRUE)

df_raters <- data.frame(fid=integer(),X=double(),Y=double(),Z=double(),rater=factor(),
                        subject=factor(),mri_type=factor(),session=integer(),
                        name=character(),description=character(),stringsAsFactors = FALSE)
csv_files <- list.files('.', "*.fcsv")

for (i in 1:length(csv_files)) {
    curr_split <- unlist(strsplit(csv_files[i],"_"))
    if (length(curr_split)>1) { # extract name and session data
        rater_subject <- curr_split[2]
        rater_mri_type <- curr_split[3]
        rater_name <- curr_split[4]
        rater_session <- as.numeric(unlist(strsplit(curr_split[5],"[.]"))[1])
    }
    curr_rater <- read.table(csv_files[i], header=FALSE, sep=",")
    df_rater <- data.frame(fid = 1:length(curr_rater$V1))

    df_rater <- cbind(df_rater,X=curr_rater[2],Y=curr_rater[3],Z=curr_rater[4],rater=rater_name,
                    subject=rater_subject,mri_type=rater_mri_type,
                    session=rater_session,name=curr_rater[12],
                    description=curr_rater[13])
  
    df_rater <- rename(df_rater, c("V2"="X","V3"="Y","V4"="Z","V12"="name","V13"="description"))
    df_raters <- rbind(df_raters,df_rater)
}

# modification due to different raters contributing to PHASE2
levels(df_raters$rater) <- c(1,2,3,6,8,4,7,9,10) # rater numbers put in order they appear (TODO: ideally string parse to handle this)

head(df_raters)

fid,X,Y,Z,rater,subject,mri_type,session,name,description
1,0.608,-1.578,0.431,1,sub-032104,T1,1,1,AC
2,1.538,-14.292,-4.865,1,sub-032104,T1,1,2,PC
3,1.398,-20.426,-10.521,1,sub-032104,T1,1,3,infracollicular sulcus
4,1.125,-9.544,-13.637,1,sub-032104,T1,1,4,PMJ
5,0.994,-7.869,-8.325,1,sub-032104,T1,1,5,superior interpeduncular fossa
6,8.551,-12.781,-10.23,1,sub-032104,T1,1,6,R superior LMS


In [4]:
levels(df_raters$subject)

# Subject Averages

For each subject, we calculate the mean value for each afid point and store it in a separate .fcsv file so that it can be loaded back into 3D Slicer.

Deviation of the values by > 1.5 mm will be classified as an outlier.

In [5]:
# start by calculating mean coordinates
df_subject_mean <- data.frame(fid=integer(),X=double(),Y=double(),Z=double(),
                        subject=factor(), name=factor(),description=character(),stringsAsFactors = FALSE)
df_subject_sd <- data.frame(fid=integer(),X=double(),Y=double(),Z=double(),
                        subject=factor(), name=factor(),description=character(),stringsAsFactors = FALSE)

# iterate over each subject and compute the mean and standard deviation
for (curr_subject in levels(df_raters$subject)) {
    for (i in 1:32) { # for each AFID32 point, calculate the mean
        df_subset <- subset(df_raters, fid == i & subject == curr_subject)
        curr_fid_name <- df_afids$name[i]
        curr_fid_desc <- df_afids$description[i]
        df_curr_fid <- data.frame(fid = i, X = mean(df_subset$X), Y = mean(df_subset$Y), Z = mean(df_subset$Z),
                        subject=curr_subject, name=curr_fid_name, description=curr_fid_desc)
        df_subject_mean <- rbind(df_subject_mean, df_curr_fid)
        df_curr_fid_sd <- data.frame(fid = i, X = sd(df_subset$X), Y = sd(df_subset$Y), Z = sd(df_subset$Z),
                        subject=curr_subject, name=curr_fid_name, description=curr_fid_desc)
        df_subject_sd <- rbind(df_subject_sd, df_curr_fid_sd)
    }
}

In [6]:
head(df_subject_mean)

fid,X,Y,Z,subject,name,description
1,0.4517778,-1.664889,0.2321111,sub-032104,1,AC
2,1.2526667,-14.216,-4.6942222,sub-032104,2,PC
3,1.4067778,-20.268556,-10.7152222,sub-032104,3,infracollicular sulcus
4,0.9527778,-9.504889,-13.4632222,sub-032104,4,PMJ
5,0.9194444,-7.663444,-8.3386667,sub-032104,5,superior interpeduncular fossa
6,8.2953333,-13.433,-10.2394444,sub-032104,6,R superior LMS


In [7]:
# Create output fcsv file for each included subject

####################################################################
# EXPORT MEAN FIDUCIAL LOCATIONS AS FCSV FILE (with outliers filtered out)
####################################################################
setwd('~/Documents/GitHub/afids-macaca/data/PHASE2_output_afid/')

for (curr_filename in levels(df_subject_mean$subject)) { # looping on each subject level
        curr_filename
        curr_mean <- subset(df_subject_mean, subject==curr_filename)
        curr_fcsv <- data.frame(id=paste('vtkMRMLMarkupsFiducialNode',curr_mean$fid,sep="_"),x=curr_mean$X,y=curr_mean$Y,z=curr_mean$Z,
                                       ow=0,ox=0,oy=0,oz=1,
                                       vis=1,sel=1,lock=1,label=curr_mean$fid,desc=df_afids$description,
                                       associatedNodeID='vtkMRMLScalarVolumeNode1',stringsAsFactors = FALSE)
        
        # write out table (need to use file connection approach because of header information)
        curr_fcsv_name <- paste0(curr_filename,'_MEAN.fcsv')
        fio <- file(curr_fcsv_name, open="wt")
        writeLines(paste('# Markups fiducial file version = 4.6'),fio)
        writeLines(paste('# CoordinateSystem = 0'),fio)
        writeLines(paste('# columns = id,x,y,z,ow,ox,oy,oz,vis,sel,lock,label,desc,associatedNodeID'),fio)
        write.table(curr_fcsv,fio,sep=',',quote=FALSE,col.names=FALSE,row.names=FALSE)
        close(fio)
}

In [8]:
head(df_subject_mean)
levels(df_subject_mean$subject)

head(df_raters)

fid,X,Y,Z,subject,name,description
1,0.4517778,-1.664889,0.2321111,sub-032104,1,AC
2,1.2526667,-14.216,-4.6942222,sub-032104,2,PC
3,1.4067778,-20.268556,-10.7152222,sub-032104,3,infracollicular sulcus
4,0.9527778,-9.504889,-13.4632222,sub-032104,4,PMJ
5,0.9194444,-7.663444,-8.3386667,sub-032104,5,superior interpeduncular fossa
6,8.2953333,-13.433,-10.2394444,sub-032104,6,R superior LMS


fid,X,Y,Z,rater,subject,mri_type,session,name,description
1,0.608,-1.578,0.431,1,sub-032104,T1,1,1,AC
2,1.538,-14.292,-4.865,1,sub-032104,T1,1,2,PC
3,1.398,-20.426,-10.521,1,sub-032104,T1,1,3,infracollicular sulcus
4,1.125,-9.544,-13.637,1,sub-032104,T1,1,4,PMJ
5,0.994,-7.869,-8.325,1,sub-032104,T1,1,5,superior interpeduncular fossa
6,8.551,-12.781,-10.23,1,sub-032104,T1,1,6,R superior LMS


# Phase 2: Raw Data Analysis

Also classify extreme outliers, defined as >= 1.5 mm from the group mean

In [9]:
df_raters$mean_AFLE <- NA # mean AFID localization error
df_raters$outlier <- NA
df_raters$xdist <- NA
df_raters$ydist <- NA
df_raters$zdist <- NA

for (i in 1:nrow(df_raters)) { # likely more efficient ways of implementing this
    curr_rater <- df_raters[i,] # current rater row
    mean_rater <- subset(df_subject_mean, subject == curr_rater$subject & fid == curr_rater$fid)

    df_raters[i,]$xdist <- curr_rater$X - mean_rater$X
    df_raters[i,]$ydist <- curr_rater$Y - mean_rater$Y
    df_raters[i,]$zdist <- curr_rater$Z - mean_rater$Z
    curr_coords <- curr_rater[,2:4]
    mean_coords <- mean_rater[,2:4]
    df_raters[i,]$mean_AFLE <- dist3D(curr_coords, mean_coords)
    df_raters[i,]$outlier <- (df_raters[i,]$mean_AFLE > 1.5) # outliers > 1.5mm
}

In [10]:
df_raters[1,]

fid,X,Y,Z,rater,subject,mri_type,session,name,description,mean_AFLE,outlier,xdist,ydist,zdist
1,0.608,-1.578,0.431,1,sub-032104,T1,1,1,AC,0.267417,False,0.1562222,0.08688889,0.1988889


In [11]:
# summary of findings
all_subjects <- subset(df_raters, session > 0) # ignore session 0 which was from the group tutorial
num_outliers <- sum(subset(all_subjects, outlier == TRUE)$outlier)
num_total <- length(all_subjects$outlier)

sprintf( "Total: %.2f +/- %.2f mm; Outliers: %d/%d (%.2f%%)",
        mean(all_subjects$mean_AFLE), sd(all_subjects$mean_AFLE),
        num_outliers, num_total, (num_outliers/num_total)*100 )

# summary of the outliers
summary_outliers <- subset(df_raters,outlier==TRUE)[,c("rater","fid","subject","session","name","description","mean_AFLE")]
#summary_outliers

# summary of results for each scan that was annotated
summary_subjects_df <- ddply(df_raters, .(subject), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))
summary_afids_df <- ddply(df_raters, .(fid), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))

# Template Averages: Post-QC

Template averages were recreated after filtering out extreme outliers (>7mm) and correcting any misannotations (out of order entries). This is an attempt at estimating the best case values without any need for changing the actual coordinates of the labels.

The following files were found to have AFIDs out of order on manual QC and were modified accordingly:
* Fid32_sub-032105_T1_Rater02_02.fcsv
* Fid32_sub-032209_T1_Rater06_02.fcsv
* Fid32_sub-032209_T1_Rater06_01.fcsv
* Fid32_sub-032209_T1_Rater03_01.fcsv
* Fid32_sub-032199_T1_Rater02_01.fcsv
* Fid32_sub-032107_T1_Rater10_01.fcsv


In [12]:
# initialize variables and load in raw fcsv data into df_raters
# not using PHASE2_input_afid as some mislabeling had to be fixed
setwd('~/Documents/GitHub/afids-macaca/data/PHASE2_input_afid_postQC/')

df_afids <- read.table('~/Documents/GitHub/afids-macaca/etc/afids.csv', sep=",", header=TRUE)

df_raters <- data.frame(fid=integer(),X=double(),Y=double(),Z=double(),rater=factor(),
                        subject=factor(),mri_type=factor(),session=integer(),
                        name=character(),description=character(),stringsAsFactors = FALSE)
csv_files <- list.files('.', "*.fcsv")

for (i in 1:length(csv_files)) {
    curr_split <- unlist(strsplit(csv_files[i],"_"))
    if (length(curr_split)>1) { # extract name and session data
        rater_subject <- curr_split[2]
        rater_mri_type <- curr_split[3]
        rater_name <- curr_split[4]
        rater_session <- as.numeric(unlist(strsplit(curr_split[5],"[.]"))[1])
    }
    curr_rater <- read.table(csv_files[i], header=FALSE, sep=",")
    df_rater <- data.frame(fid = 1:length(curr_rater$V1))

    df_rater <- cbind(df_rater,X=curr_rater[2],Y=curr_rater[3],Z=curr_rater[4],rater=rater_name,
                    subject=rater_subject,mri_type=rater_mri_type,
                    session=rater_session,name=curr_rater[12],
                    description=curr_rater[13])
  
    df_rater <- rename(df_rater, c("V2"="X","V3"="Y","V4"="Z","V12"="name","V13"="description"))
    df_raters <- rbind(df_raters,df_rater)
}

# modification due to different raters contributing to PHASE2
levels(df_raters$rater) <- c(1,2,3,6,8,4,7,9,10) # rater numbers put in order they appear (TODO: ideally string parse to handle this)

df_raters$mean_AFLE <- NA # mean AFID localization error
df_raters$outlier <- NA
df_raters$xdist <- NA
df_raters$ydist <- NA
df_raters$zdist <- NA

for (i in 1:nrow(df_raters)) { # likely more efficient ways of implementing this
    curr_rater <- df_raters[i,] # current rater row
    mean_rater <- subset(df_subject_mean, subject == curr_rater$subject & fid == curr_rater$fid)

    df_raters[i,]$xdist <- curr_rater$X - mean_rater$X
    df_raters[i,]$ydist <- curr_rater$Y - mean_rater$Y
    df_raters[i,]$zdist <- curr_rater$Z - mean_rater$Z
    curr_coords <- curr_rater[,2:4]
    mean_coords <- mean_rater[,2:4]
    df_raters[i,]$mean_AFLE <- dist3D(curr_coords, mean_coords)
    df_raters[i,]$outlier <- (df_raters[i,]$mean_AFLE > 7) # outliers > 7mm
}

df_raters_QC <- subset(df_raters, outlier == FALSE)


# Subject Averages Post-QC

For each subject, we calculate the mean value for each afid point and store it in a separate .fcsv file so that it can be loaded back into 3D Slicer. This is after an initial round of QC to identify any mislabeled AFIDs.

Deviation of the values by > 1.5 mm will be classified as an outlier.

In [13]:
# start by calculating mean coordinates
df_subject_mean_QC <- data.frame(fid=integer(),X=double(),Y=double(),Z=double(),
                              subject=factor(), name=factor(),description=character(),stringsAsFactors = FALSE)
df_subject_sd_QC <- data.frame(fid=integer(),X=double(),Y=double(),Z=double(),
                            subject=factor(), name=factor(),description=character(),stringsAsFactors = FALSE)

# iterate over each subject and compute the mean and standard deviation
for (curr_subject in levels(df_raters_QC$subject)) {
  for (i in 1:32) { # for each AFID32 point, calculate the mean
    df_subset <- subset(df_raters_QC, fid == i & subject == curr_subject)
    curr_fid_name <- df_afids$name[i]
    curr_fid_desc <- df_afids$description[i]
    df_curr_fid <- data.frame(fid = i, X = mean(df_subset$X), Y = mean(df_subset$Y), Z = mean(df_subset$Z),
                              subject=curr_subject, name=curr_fid_name, description=curr_fid_desc)
    df_subject_mean_QC <- rbind(df_subject_mean_QC, df_curr_fid)
    df_curr_fid_sd <- data.frame(fid = i, X = sd(df_subset$X), Y = sd(df_subset$Y), Z = sd(df_subset$Z),
                                 subject=curr_subject, name=curr_fid_name, description=curr_fid_desc)
    df_subject_sd_QC <- rbind(df_subject_sd_QC, df_curr_fid_sd)
  }
}

In [14]:
# Create output fcsv file for each included subject

####################################################################
# EXPORT MEAN FIDUCIAL LOCATIONS AS FCSV FILE (with outliers filtered out)
####################################################################
setwd('~/Documents/GitHub/afids-macaca/data/PHASE2_output_afid_postQC/')

for (curr_filename in levels(df_subject_mean_QC$subject)) { # looping on each subject level
  curr_filename
  curr_mean <- subset(df_subject_mean_QC, subject==curr_filename)
  curr_fcsv <- data.frame(id=paste('vtkMRMLMarkupsFiducialNode',curr_mean$fid,sep="_"),x=curr_mean$X,y=curr_mean$Y,z=curr_mean$Z,
                          ow=0,ox=0,oy=0,oz=1,
                          vis=1,sel=1,lock=1,label=curr_mean$fid,desc=df_afids$description,
                          associatedNodeID='vtkMRMLScalarVolumeNode1',stringsAsFactors = FALSE)
  
  # write out table (need to use file connection approach because of header information)
  curr_fcsv_name <- paste0(curr_filename,'_MEAN.fcsv')
  fio <- file(curr_fcsv_name, open="wt")
  writeLines(paste('# Markups fiducial file version = 4.6'),fio)
  writeLines(paste('# CoordinateSystem = 0'),fio)
  writeLines(paste('# columns = id,x,y,z,ow,ox,oy,oz,vis,sel,lock,label,desc,associatedNodeID'),fio)
  write.table(curr_fcsv,fio,sep=',',quote=FALSE,col.names=FALSE,row.names=FALSE)
  close(fio)
}

# Phase 2: Post-QC Analysis

Also classify outliers, defined as >= 1.5 mm from the group mean

In [15]:
df_raters_QC$mean_AFLE <- NA # mean AFID localization error
df_raters_QC$outlier <- NA
df_raters_QC$xdist <- NA
df_raters_QC$ydist <- NA
df_raters_QC$zdist <- NA

for (i in 1:nrow(df_raters_QC)) { # likely more efficient ways of implementing this
  curr_rater <- df_raters_QC[i,] # current rater row
  mean_rater <- subset(df_subject_mean_QC, subject == curr_rater$subject & fid == curr_rater$fid)
  
  df_raters_QC[i,]$xdist <- curr_rater$X - mean_rater$X
  df_raters_QC[i,]$ydist <- curr_rater$Y - mean_rater$Y
  df_raters_QC[i,]$zdist <- curr_rater$Z - mean_rater$Z
  curr_coords <- curr_rater[,2:4]
  mean_coords <- mean_rater[,2:4]
  df_raters_QC[i,]$mean_AFLE <- dist3D(curr_coords, mean_coords)
  df_raters_QC[i,]$outlier <- (df_raters_QC[i,]$mean_AFLE > 1.5) # outliers > 1.5mm
}

In [16]:
# summary of findings
all_subjects <- subset(df_raters_QC, session > 0) # ignore session 0 which was from the group tutorial
num_outliers <- sum(subset(all_subjects, outlier == TRUE)$outlier)
num_total <- length(all_subjects$outlier)

sprintf( "Total: %.2f +/- %.2f mm; Outliers: %d/%d (%.2f%%)",
         mean(all_subjects$mean_AFLE), sd(all_subjects$mean_AFLE),
         num_outliers, num_total, (num_outliers/num_total)*100 )

# summary of the outliers
summary_outliers <- subset(df_raters_QC,outlier==TRUE)[,c("rater","fid","subject","session","name","description","mean_AFLE")]
#summary_outliers

# summary of results for each scan that was annotated
summary_subjects_df <- ddply(df_raters_QC, .(subject), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))
summary_afids_df <- ddply(df_raters_QC, .(fid), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))

In [17]:
# Screen for outliers
subset(df_raters_QC, mean_AFLE > 3)

Unnamed: 0,fid,X,Y,Z,rater,subject,mri_type,session,name,description,mean_AFLE,outlier,xdist,ydist,zdist
183,23,10.872,-4.373,-13.107,6,sub-032104,T1,1,23,R superior AM temporal horn,3.271502,True,1.462,-1.84466667,-2.2721111
375,23,13.306,-3.557,-11.074,4,sub-032105,T1,1,23,R superior AM temporal horn,3.425267,True,2.789875,-1.98075,-0.16025
407,23,13.178,-3.237,-11.31,4,sub-032105,T1,2,23,R superior AM temporal horn,3.162386,True,2.661875,-1.66075,-0.39625
701,29,17.206,-27.608,-8.697,1,sub-032108,T1,1,29,R ventral occipital horn,3.355694,True,1.3123116,2.99297123,0.762
1692,28,89.639,13.844,102.432,6,sub-032209,T1,1,28,L indusium griseum origin,3.321899,True,2.421875,-1.007,2.0385
1816,24,51.906,37.368,55.885,2,sub-032210,T1,1,24,L superior AM temporal horn,3.075567,True,-2.65788955,1.4292178,-0.5933543
1827,3,64.54818,22.78545,62.67241,2,sub-032210,T1,2,3,ICS,3.059039,True,0.02378198,2.000515,2.3141075
1832,8,71.03928,22.83601,54.78426,2,sub-032210,T1,2,8,RILMS,3.082205,True,-0.18350176,-3.07361817,-0.1385205
1833,9,57.91323,22.63398,55.22793,2,sub-032210,T1,2,9,LILMS,3.393196,True,0.15282287,-3.35564057,0.4796915
1947,27,77.684,19.784,62.501,7,sub-032210,T1,1,27,R indusium griseum origin,3.490885,True,3.02592424,-0.04016403,-1.7402437


# Post-QC for PHASE3



# Template Averages: Post-QC for PHASE3

Single subject averages were recreated after filtering out outliers (>1.5mm).

The goal here is to get the best ground truth estimates for PHASE3 (not to estimate the AFLE) so thus the outlier value applied is more stringent.

In [18]:
# initialize variables and load in raw fcsv data into df_raters
# not using PHASE2_input_afid as some mislabeling had to be fixed
setwd('~/Documents/GitHub/afids-macaca/data/PHASE2_input_afid_postQC/')

df_afids <- read.table('~/Documents/GitHub/afids-macaca/etc/afids.csv', sep=",", header=TRUE)

df_raters <- data.frame(fid=integer(),X=double(),Y=double(),Z=double(),rater=factor(),
                        subject=factor(),mri_type=factor(),session=integer(),
                        name=character(),description=character(),stringsAsFactors = FALSE)
csv_files <- list.files('.', "*.fcsv")

for (i in 1:length(csv_files)) {
    curr_split <- unlist(strsplit(csv_files[i],"_"))
    if (length(curr_split)>1) { # extract name and session data
        rater_subject <- curr_split[2]
        rater_mri_type <- curr_split[3]
        rater_name <- curr_split[4]
        rater_session <- as.numeric(unlist(strsplit(curr_split[5],"[.]"))[1])
    }
    curr_rater <- read.table(csv_files[i], header=FALSE, sep=",")
    df_rater <- data.frame(fid = 1:length(curr_rater$V1))

    df_rater <- cbind(df_rater,X=curr_rater[2],Y=curr_rater[3],Z=curr_rater[4],rater=rater_name,
                    subject=rater_subject,mri_type=rater_mri_type,
                    session=rater_session,name=curr_rater[12],
                    description=curr_rater[13])
  
    df_rater <- rename(df_rater, c("V2"="X","V3"="Y","V4"="Z","V12"="name","V13"="description"))
    df_raters <- rbind(df_raters,df_rater)
}

# modification due to different raters contributing to PHASE2
levels(df_raters$rater) <- c(1,2,3,6,8,4,7,9,10) # rater numbers put in order they appear (TODO: ideally string parse to handle this)

df_raters$mean_AFLE <- NA # mean AFID localization error
df_raters$outlier <- NA
df_raters$xdist <- NA
df_raters$ydist <- NA
df_raters$zdist <- NA

for (i in 1:nrow(df_raters)) { # likely more efficient ways of implementing this
    curr_rater <- df_raters[i,] # current rater row
    mean_rater <- subset(df_subject_mean, subject == curr_rater$subject & fid == curr_rater$fid)

    df_raters[i,]$xdist <- curr_rater$X - mean_rater$X
    df_raters[i,]$ydist <- curr_rater$Y - mean_rater$Y
    df_raters[i,]$zdist <- curr_rater$Z - mean_rater$Z
    curr_coords <- curr_rater[,2:4]
    mean_coords <- mean_rater[,2:4]
    df_raters[i,]$mean_AFLE <- dist3D(curr_coords, mean_coords)
    df_raters[i,]$outlier <- (df_raters[i,]$mean_AFLE > 1.5) # outliers > 1.5mm
}

df_raters_QC_P3 <- subset(df_raters, outlier == FALSE)


# Subject Averages Post-QC for PHASE3

For each subject, we calculate the mean value for each afid point and store it in a separate .fcsv file so that it can be loaded back into 3D Slicer. This is after an initial round of QC to identify any mislabeled AFIDs.

Deviation of the values by > 1.5 mm will be classified as an outlier (should be pretty close to zero POST-QC for PHASE3 since by definition thresholding at this level).

In [19]:
# start by calculating mean coordinates
df_subject_mean_QC_P3 <- data.frame(fid=integer(),X=double(),Y=double(),Z=double(),
                                 subject=factor(), name=factor(),description=character(),stringsAsFactors = FALSE)
df_subject_sd_QC_P3 <- data.frame(fid=integer(),X=double(),Y=double(),Z=double(),
                               subject=factor(), name=factor(),description=character(),stringsAsFactors = FALSE)

# iterate over each subject and compute the mean and standard deviation
for (curr_subject in levels(df_raters_QC_P3$subject)) {
  for (i in 1:32) { # for each AFID32 point, calculate the mean
    df_subset <- subset(df_raters_QC_P3, fid == i & subject == curr_subject)
    curr_fid_name <- df_afids$name[i]
    curr_fid_desc <- df_afids$description[i]
    df_curr_fid <- data.frame(fid = i, X = mean(df_subset$X), Y = mean(df_subset$Y), Z = mean(df_subset$Z),
                              subject=curr_subject, name=curr_fid_name, description=curr_fid_desc)
    df_subject_mean_QC_P3 <- rbind(df_subject_mean_QC_P3, df_curr_fid)
    df_curr_fid_sd <- data.frame(fid = i, X = sd(df_subset$X), Y = sd(df_subset$Y), Z = sd(df_subset$Z),
                                 subject=curr_subject, name=curr_fid_name, description=curr_fid_desc)
    df_subject_sd_QC_P3 <- rbind(df_subject_sd_QC_P3, df_curr_fid_sd)
  }
}

In [20]:
# Create output fcsv file for each included subject

####################################################################
# EXPORT MEAN FIDUCIAL LOCATIONS AS FCSV FILE (with outliers filtered out)
####################################################################
setwd('~/Documents/GitHub/afids-macaca/data/PHASE2_output_afid_postQC_for_PHASE3//')

for (curr_filename in levels(df_subject_mean_QC_P3$subject)) { # looping on each subject level
  curr_filename
  curr_mean <- subset(df_subject_mean_QC_P3, subject==curr_filename)
  curr_fcsv <- data.frame(id=paste('vtkMRMLMarkupsFiducialNode',curr_mean$fid,sep="_"),x=curr_mean$X,y=curr_mean$Y,z=curr_mean$Z,
                          ow=0,ox=0,oy=0,oz=1,
                          vis=1,sel=1,lock=1,label=curr_mean$fid,desc=df_afids$description,
                          associatedNodeID='vtkMRMLScalarVolumeNode1',stringsAsFactors = FALSE)
  
  # write out table (need to use file connection approach because of header information)
  curr_fcsv_name <- paste0(curr_filename,'_MEAN.fcsv')
  fio <- file(curr_fcsv_name, open="wt")
  writeLines(paste('# Markups fiducial file version = 4.6'),fio)
  writeLines(paste('# CoordinateSystem = 0'),fio)
  writeLines(paste('# columns = id,x,y,z,ow,ox,oy,oz,vis,sel,lock,label,desc,associatedNodeID'),fio)
  write.table(curr_fcsv,fio,sep=',',quote=FALSE,col.names=FALSE,row.names=FALSE)
  close(fio)
}

# Phase 2: Post-QC Analysis

Also classify outliers, defined as >= 1.5 mm from the group mean

In [21]:
df_raters_QC_P3$mean_AFLE <- NA # mean AFID localization error
df_raters_QC_P3$outlier <- NA
df_raters_QC_P3$xdist <- NA
df_raters_QC_P3$ydist <- NA
df_raters_QC_P3$zdist <- NA

for (i in 1:nrow(df_raters_QC_P3)) { # likely more efficient ways of implementing this
  curr_rater <- df_raters_QC_P3[i,] # current rater row
  mean_rater <- subset(df_subject_mean_QC_P3, subject == curr_rater$subject & fid == curr_rater$fid)
  
  df_raters_QC_P3[i,]$xdist <- curr_rater$X - mean_rater$X
  df_raters_QC_P3[i,]$ydist <- curr_rater$Y - mean_rater$Y
  df_raters_QC_P3[i,]$zdist <- curr_rater$Z - mean_rater$Z
  curr_coords <- curr_rater[,2:4]
  mean_coords <- mean_rater[,2:4]
  df_raters_QC_P3[i,]$mean_AFLE <- dist3D(curr_coords, mean_coords)
  df_raters_QC_P3[i,]$outlier <- (df_raters_QC_P3[i,]$mean_AFLE > 1.5) # outliers > 1.5mm
}

In [22]:
# summary of findings
all_subjects <- subset(df_raters_QC_P3, session > 0) # ignore session 0 which was from the group tutorial
num_outliers <- sum(subset(all_subjects, outlier == TRUE)$outlier)
num_total <- length(all_subjects$outlier)

sprintf( "Total: %.2f +/- %.2f mm; Outliers: %d/%d (%.2f%%)",
         mean(all_subjects$mean_AFLE), sd(all_subjects$mean_AFLE),
         num_outliers, num_total, (num_outliers/num_total)*100 )

# summary of the outliers
summary_outliers <- subset(df_raters_QC_P3,outlier==TRUE)[,c("rater","fid","subject","session","name","description","mean_AFLE")]
#summary_outliers

# summary of results for each scan that was annotated
summary_subjects_df <- ddply(df_raters_QC_P3, .(subject), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))
summary_afids_df <- ddply(df_raters_QC_P3, .(fid), summarize, mean=mean(mean_AFLE), sd=sd(mean_AFLE), max=max(mean_AFLE))

In [23]:
# Screen for outliers
subset(df_raters_QC_P3, mean_AFLE > 1.5)

Unnamed: 0,fid,X,Y,Z,rater,subject,mri_type,session,name,description,mean_AFLE,outlier,xdist,ydist,zdist
346,26,-10.421,1.232,-14.811,2,sub-032105,T1,2,26,LIAMTH,1.700312,True,-1.1645,-1.1113333,-0.5476667
441,25,10.646,2.0,-14.157,7,sub-032105,T1,1,25,R inferior AM temporal horn,1.583045,True,-0.58985714,-1.4655714,0.101
503,23,9.973,-2.256,-9.915,9,sub-032105,T1,1,23,R superior AM temporal horn,1.520745,True,0.256,-1.2086,0.8868
1066,10,-0.52,-25.738,49.056,2,sub-032199,T1,2,10,CUL,1.614427,True,0.05114286,1.5841429,-0.307
1931,11,64.212,35.887,59.265,7,sub-032210,T1,1,11,intermammillary sulcus,1.533772,True,-0.12313644,1.482621,-0.3730003
2041,25,75.546,39.542,52.087,9,sub-032210,T1,2,25,R inferior AM temporal horn,1.691614,True,1.48327776,0.7072618,-0.4015297
2042,26,54.007,38.08,52.023,9,sub-032210,T1,2,26,L inferior AM temporal horn,1.602095,True,-1.32922309,-0.4526755,-0.7713361
2417,17,85.339,53.689,117.538,3,sub-032213,T1,2,17,RLVPC,1.713421,True,-0.17885714,1.5922857,0.607
2482,18,69.699,53.358,118.114,6,sub-032213,T1,2,18,L LV at PC,1.805791,True,-0.11266667,1.5021667,0.9958333


In [24]:
# versioning info
sessionInfo()

R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Catalina 10.15.7

Matrix products: default
BLAS/LAPACK: /Users/jclau/anaconda3/envs/r-tutorial/lib/R/lib/libRblas.dylib

locale:
[1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggplot2_3.1.1  reshape2_1.4.3 digest_0.6.18  plyr_1.8.4    

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.1       magrittr_1.5     tidyselect_0.2.5 munsell_0.5.0   
 [5] uuid_0.1-2       colorspace_1.4-1 R6_2.4.0         rlang_0.3.4     
 [9] dplyr_0.8.0.1    stringr_1.4.0    tools_3.6.1      grid_3.6.1      
[13] gtable_0.3.0     withr_2.1.2      htmltools_0.3.6  assertthat_0.2.1
[17] lazyeval_0.2.2   tibble_2.1.1     crayon_1.3.4     IRdisplay_0.7.0 
[21] purrr_0.3.2      repr_0.19.2      base64enc_0.1-3  IRkernel_0.8.15 
[25] glue_1.3.1       evaluate_0.13    