In [None]:
library("MetaCyto")
library(flowCore)
library(dplyr)
library(ggplot2)
library(tidyr)

In [None]:
##### get fcs markers #####
cytof_files = read.csv("result_05_cytof_files.csv")
cytof_files$name = file.path("CyTOF_data",cytof_files$name)
file_info = cytof_files%>%
    select(fcs_files = name, SDY = study_accession)%>%
    mutate(marker = NA)

for(i in 1:nrow(file_info)){
    f = read.FCSheader(file_info$fcs_files[i])
    f = f[[1]]
    f = f[grepl("\\$P.*[NS]",names(f))]
    f = unlist(f)
    f = paste0(f,collapse = "_")
    file_info$marker[i]=f
}


In [None]:
dim(cytof_files)
length(unique(cytof_files$subject_accession))

In [None]:
##### define batch #####
batch_df = file_info%>%
    select(SDY,marker)%>%
    unique()%>%
    group_by(SDY)%>%
    mutate(study_id = paste(SDY,"panel", 1:n(),sep="_"))


file_info = inner_join(file_info, batch_df, by = c("SDY","marker"))
file_info = file_info%>%select(fcs_files,study_id)
print(file_info[1:10,])

In [None]:
##### get sample data #####
sample_info = cytof_files%>%mutate(CMV = CMV_Ab>2)%>%select(fcs_files = name, CMV)

In [None]:
##### preprocessing #####
preprocessing.batch(inputMeta= file_info,
                    assay="CyTOF",
                    b=1/5,
                    outpath="preprocess_output",
                    excludeTransformParameters=c("FSC-A","FSC-W","FSC-H","Time","Cell_length"))

In [None]:
##### organize marker #####
files=list.files("preprocess_output",pattern="processed_sample",recursive=T,full.names=T)
panel_info=collectData(files,longform=F)
PS=panelSummary(panel_info,cluster=F,folder = ".") 

ab_names=sort(rownames(PS))
newname=c('(BA138)DD','BEAD','CCR6','CCR7','CCR7','CD11B','CD11C','CD123',
          'CD127','CD127','CD14','CD14','CD16','CD16','CD161','CD161','CD19',
          'CD19','CD20','CD20','CD24','CD24','CD25','CD25','CD27','CD27','CD28',
          'CD28','CD3','CD3','CD33','CD33','CD38','CD38','CD4','CD4','CD45RA','CD45RA','CD56',
          'CD56','CD57','CD8','CD8','CD85J','CD85J','CD86','CD94','CD94','CELL_LENGTH','CXCR3','CXCR5',
          'DEAD','DEAD','DNA1','DNA1','DNA2','DNA2','HLADR','HLADR','ICOS','IGD','IGD','PD-1',
          'SAMPLE_ID','TCRGD','TCRGD','TIME')
nameUpdator(ab_names,newname,files)

panel_info=collectData(files,longform=F)
PS=panelSummary(panel_info,cluster=F,folder = ".") 


In [None]:
# organize cell definitions
cluster_label=c("overall_cell"="CD8+|CD3+|CD94+|CD27-",
                 "N CD94"="CD8+|CD3+|CD94+|CD27-|CCR7+|CD45RA+",
                 "E CD94"="CD8+|CD3+|CD94+|CD27-|CCR7-|CD45RA+",
                 "CM CD94"="CD8+|CD3+|CD94+|CD27-|CCR7+|CD45RA-",
                 "EM"="CD8+|CD3+|CD94+|CD27-|CCR7-|CD45RA-",
                "N"="CD8+|CD3+|CCR7+|CD45RA+",
                 "E"="CD8+|CD3+|CCR7-|CD45RA+",
                 "CM"="CD8+|CD3+|CCR7+|CD45RA-",
                 "EM"="CD8+|CD3+|CCR7-|CD45RA-")
searchCluster.batch(preprocessOutputFolder="preprocess_output",
              outpath="search_output",
              clusterLabel=cluster_label)

In [None]:
##### statistical analysis #####

# Collect Summary statistics generated 
files=list.files("search_output",pattern="cluster_stats_in_each_sample",recursive=T,full.names=T)
fcs_stats=collectData(files,longform=T)



all_data=inner_join(fcs_stats,sample_info,by="fcs_files")
t1 = data.frame(label = cluster_label, cell_name = names(cluster_label))
all_data = inner_join(all_data,t1)

plot_df = all_data
GA=glmAnalysis(value="value",variableOfInterst="CMV",parameter="fraction",
               otherVariables=NULL,studyID="study_id",label="cell_name",
               data=plot_df,CILevel=0.95,ifScale=c(T,F))
GA=GA[order(GA$Effect_size),]

print(GA)

# plot the results
plotGA(GA)



In [None]:
# Analyze on cluster  in detail  
all_data = all_data %>% mutate(study_id = gsub("_.*","",study_id))

pdf("result_12_meta_analysis.pdf", 5, 5)
for(i in 1: length(cluster_label)){
    L=cluster_label[i]
    dat=subset(all_data,all_data$parameter_name=="fraction"&
             all_data$label==L)
    MA=metaAnalysis(value="value",variableOfInterst="CMV",main=names(cluster_label)[i],
                  otherVariables=NULL,studyID="study_id",
                  data=dat,CILevel=0.95,ifScale=c(T,F))
}
dev.off()



In [None]:
##### plot CD8+ CD3+ CD94+ CD27- cell in all study #####
all_data = all_data %>% mutate(study_id = gsub("_.*","",study_id))
plot_df = all_data %>% filter(parameter_name=="fraction" & all_data$label=="CD8+|CD3+|CD94+|CD27-")%>%
    filter(!is.na(CMV))
p = ggplot(plot_df, aes(x = study_id, y = value, fill = CMV))+
    geom_boxplot()+theme_bw()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
pdf("result_12_box_plot.pdf",width= 5, height = 3)
plot(p)
dev.off()