In [None]:
library("MetaCyto")
library(flowCore)
library(dplyr)
library(ggplot2)
library(tidyr)

In [None]:
##### get fcs markers #####
cytof_files = read.csv("result_05_cytof_files.csv")
cytof_files$name = file.path("CyTOF_data",cytof_files$name)
file_info = cytof_files%>%
    select(fcs_files = name, SDY = study_accession)%>%
    mutate(marker = NA)

for(i in 1:nrow(file_info)){
    f = read.FCSheader(file_info$fcs_files[i])
    f = f[[1]]
    f = f[grepl("\\$P.*[NS]",names(f))]
    f = unlist(f)
    f = paste0(f,collapse = "_")
    file_info$marker[i]=f
}


In [None]:
dim(cytof_files)
length(unique(cytof_files$subject_accession))

In [None]:
##### define batch #####
batch_df = file_info%>%
    select(SDY,marker)%>%
    unique()%>%
    group_by(SDY)%>%
    mutate(study_id = paste(SDY,"panel", 1:n(),sep="_"))


file_info = inner_join(file_info, batch_df, by = c("SDY","marker"))
file_info = file_info%>%select(fcs_files,study_id)
print(file_info[1:10,])

In [None]:
##### get sample data #####
sample_info = cytof_files%>%mutate(CMV = CMV_Ab>2)%>%select(fcs_files = name, CMV)

In [None]:
##### preprocessing #####
preprocessing.batch(inputMeta= file_info,
                    assay="CyTOF",
                    b=1/5,
                    outpath="preprocess_output",
                    excludeTransformParameters=c("FSC-A","FSC-W","FSC-H","Time","Cell_length"))

In [None]:
##### organize marker #####
files=list.files("preprocess_output",pattern="processed_sample",recursive=T,full.names=T)
panel_info=collectData(files,longform=F)
PS=panelSummary(panel_info,cluster=F,folder = ".") 

ab_names=sort(rownames(PS))
newname=c('(BA138)DD','BEAD','CCR6','CCR7','CCR7','CD11B','CD11C','CD123',
          'CD127','CD127','CD14','CD14','CD16','CD16','CD161','CD161','CD19',
          'CD19','CD20','CD20','CD24','CD24','CD25','CD25','CD27','CD27','CD28',
          'CD28','CD3','CD3','CD33','CD33','CD38','CD38','CD4','CD4','CD45RA','CD45RA','CD56',
          'CD56','CD57','CD8','CD8','CD85J','CD85J','CD86','CD94','CD94','CELL_LENGTH','CXCR3','CXCR5',
          'DEAD','DEAD','DNA1','DNA1','DNA2','DNA2','HLADR','HLADR','ICOS','IGD','IGD','PD-1',
          'SAMPLE_ID','TCRGD','TCRGD','TIME')
nameUpdator(ab_names,newname,files)

panel_info=collectData(files,longform=F)
PS=panelSummary(panel_info,cluster=F,folder = ".") 


In [None]:
# organize cell definitions
cluster_label=c("Memory B cells"="CD14-|CD33-|CD3-|CD19+|CD20+|CD24+|CD38-", #memory B cells
                "CD8+ T-EM cells"="CD14-|CD33-|CD3+|CD8+|CCR7-|CD45RA-", #effector memory CD8+ T cells
                "CD4+ T-CM cells"="CD14-|CD33-|CD3+|CD4+|CCR7+|CD45RA-", #central memory CD4+ T cells
                "Effector CD8+ T cells"="CD14-|CD33-|CD3+|CD8+|CCR7-|CD45RA+", #effector CD8+ T Cells
                "NKT cells"="CD14-|CD33-|CD3+|CD56+", #NKT cells
                "CD8+ CD94+ NKT cells"="CD14-|CD33-|CD3+|CD56+|CD8+|CD94+", #NKT cells
                "Plasmablasts"="CD14-|CD33-|CD3-|CD20-|CD27+|CD38+", #plasmablasts
                "Conv1 F1_max"="CD45RA+|CD28-|CD33-", #Conv1 F1_max_0.0093
                "Conv1 F1_min"="CD45RA-|CD28+|CD33+",
                "Conv1 F2_max"="CD3+|CD45RA-|CD8+",
                "Conv1 F2_min"="CD3-|HLADR+|CD56-",
                "Conv1 F3_max"="CD20-|HLADR-|CD45RA-",
                "Conv1 F3_min"="CD20+|CD27-|CCR7+", #Conv1 F3_min_0.0425
                "Conv2 F1_max"="CD8+|CD94+|CD3+", #Conv2 F1_max_0.0
                "Conv2 F1_min"="CD8-|CD20-|CD27+", #Conv2 F1_min_0.0139
                "Conv2 F2_max"="CD8-|CD20-|CD56-", #Conv2 F2_max_0.0
                "Conv2 F2_min"="CD8+|CD94+|CD28-", #Conv2 F2_min_0.0
                "Conv2 F3_max"="CD3+|CD45RA-|CD94+",
                "Conv2 F3_min"="CD3-|CD94-|CD33+")
searchCluster.batch(preprocessOutputFolder="preprocess_output",
              outpath="search_output",
              clusterLabel=cluster_label)

In [None]:
##### statistical analysis #####

# Collect Summary statistics generated 
files=list.files("search_output",pattern="cluster_stats_in_each_sample",recursive=T,full.names=T)
fcs_stats=collectData(files,longform=T)



all_data=inner_join(fcs_stats,sample_info,by="fcs_files")
t1 = data.frame(label = cluster_label, cell_name = names(cluster_label))
all_data = inner_join(all_data,t1)

plot_df = all_data %>% filter(grepl("max|cells|blast",cell_name))
GA=glmAnalysis(value="value",variableOfInterst="CMV",parameter="fraction",
               otherVariables=NULL,studyID="study_id",label="cell_name",
               data=plot_df,CILevel=0.95,ifScale=c(T,F))
GA=GA[order(GA$Effect_size),]

print(GA)

# plot the results
plotGA(GA)



In [None]:
# Analyze on cluster  in detail  
all_data = all_data %>% mutate(study_id = gsub("_.*","",study_id))

pdf("result_09_meta_analysis.pdf", 5, 5)
for(i in 1: length(cluster_label)){
    L=cluster_label[i]
    dat=subset(all_data,all_data$parameter_name=="fraction"&
             all_data$label==L)
    MA=metaAnalysis(value="value",variableOfInterst="CMV",main=names(cluster_label)[i],
                  otherVariables=NULL,studyID="study_id",
                  data=dat,CILevel=0.95,ifScale=c(T,F))
}
dev.off()



In [None]:
##### plot CD8+ CD3+ CD94+ cell in all study #####
all_data = all_data %>% mutate(study_id = gsub("_.*","",study_id))
plot_df = all_data %>% filter(parameter_name=="fraction" & all_data$label=="CD14-|CD33-|CD3-|CD20-|CD27+|CD38+")%>%
    filter(!is.na(CMV))
p = ggplot(plot_df, aes(x = study_id, y = value, fill = CMV))+
    geom_boxplot()+theme_bw()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
pdf("result_09_box_plot.pdf",width= 5, height = 3)
plot(p)
dev.off()

In [None]:
##### plot 2D #####
f = read.FCS("preprocess_output/SDY519_panel_2.fcs")
f = as.data.frame(f@exprs)

pdf("result_09_2D_plots.pdf",height=2,width= 3)
p = ggplot(f, aes(x=CD3, y= CD8)) + 
  stat_binhex(bins=200)+
  scale_fill_distiller(palette = "RdBu",limits=c(0,200))+
  theme_bw()
plot(p)
f1 = f%>%filter(CD3>2 & CD8>3 )

p = ggplot(f1, aes(x=CD94,y = DNA1)) + 
  stat_binhex(bins=200)+
  scale_fill_distiller(palette = "RdBu",limits=c(0,200))+
  theme_bw()
plot(p)
f2 = f1%>%filter(DNA1>3 & CD94>1 )

p = ggplot(f1, aes(x=DNA1,y = CD27)) + 
  stat_binhex(bins=200)+
  scale_fill_distiller(palette = "RdBu",limits=c(0,200))+
  theme_bw()
plot(p)
f2 = f1%>%filter(DNA1>3 & CD94>1 )


p = ggplot(f2, aes(x=CCR7,y = CD45RA)) + 
  stat_binhex(bins=200)+
  scale_fill_distiller(palette = "RdBu",limits=c(0,500))+
  theme_bw()
plot(p)


p = ggplot(f1, aes(x=CCR7,y = CD45RA)) + 
  stat_binhex(bins=200)+
  scale_fill_distiller(palette = "RdBu",limits=c(0,500))+
  theme_bw()
plot(p)


p = ggplot(f2, aes(x=CD56,y = CD16)) + 
  stat_binhex(bins=200)+
  scale_fill_distiller(palette = "RdBu",limits=c(0,500))+
  theme_bw()
plot(p)

dev.off()



In [None]:
##### plot histogram #####
f1_plot = f1%>% mutate(gated = CD94>1)
f1_plot = f1_plot%>%select(-TIME)%>%gather(key = "marker",value = "value", -gated)
p = ggplot(f1_plot)+
    geom_histogram(aes(x = value, y = ..density..,fill = gated), alpha = 0.5,position='identity')+
    facet_wrap(~marker, scale="free")
plot(p)

In [None]:
##### get all 24 cell definitions #####
cluster_label=c("B cells"="CD14- CD33- CD3- CD19+ CD20+",
                "CD16- monocytes"= "CD14+ CD33+ CD16-",
                "CD16+ monocytes"= "CD14+ CD33+ CD16+",
                "CD4+ T cells"='CD14- CD33- CD3+ CD4+',
                "CD8+ T cells"='CD14- CD33- CD3+ CD8+',
                "central memory CD4+ T cells"='CD14- CD33- CD3+ CD4+ CCR7+ CD45RA-',
                'central memory CD8+ T cells'='CD14- CD33- CD3+ CD8+ CCR7+ CD45RA-',
                'effector CD4+ T cells'='CD14- CD33- CD3+ CD4+ CCR7- CD45RA+',
                'effector CD8+ T cells'='CD14- CD33- CD3+ CD8+ CCR7- CD45RA+',
                'effector memory CD4+ T cells'='CD14- CD33- CD3+ CD4+ CCR7- CD45RA-',
                'effector memory CD8+ T cells'='CD14- CD33- CD3+ CD8+ CCR7- CD45RA-',
                'gamma-delta T cells'='CD14- CD33- TCRgd+',
                'lymphocytes'='CD14- CD33-',
                'memory B cells'='CD14- CD33- CD3- CD19+ CD20+ CD24+ CD38-',
                'monocytes'='CD14+ CD33+',
                'naive B cells'='CD14- CD33- CD3- CD19+ CD20+ CD24- CD38+',
                'naive CD4+ T cells'='CD14- CD33- CD3+ CD4+ CCR7+ CD45RA+',
                'naive CD8+ T cells'='CD14- CD33- CD3+ CD8+ CCR7+ CD45RA+',
                'NK cells'='CD14- CD33- CD3- CD16+ CD56+',
                'NKT cells'='CD14- CD33- CD3+ CD56+',
                'plasmablasts'='CD14- CD33- CD3- CD20- CD27+ CD38+',
                'T cells'='CD14- CD33- CD3+',
                'transitional B cells'='CD14- CD33- CD3- CD19+ CD20+ CD24+ CD38+',
                'Tregs'='CD14- CD33- CD3+ CD4+ CD25+ CD127-')
cluster_label = gsub(" ", "|", cluster_label)

searchCluster.batch(preprocessOutputFolder="preprocess_output",
              outpath="search_output_2",
              clusterLabel=cluster_label)



In [None]:
# Collect Summary statistics generated 
files=list.files("search_output",pattern="cluster_stats_in_each_sample",recursive=T,full.names=T)
fcs_stats=collectData(files,longform=T)%>%unique()

t1 = data.frame(label = cluster_label, cell_name = names(cluster_label))
fcs_stats = inner_join(fcs_stats,t1)

fcs_stats = fcs_stats %>% filter(parameter_name== "fraction")%>%
    mutate(CN = paste(cell_name, parameter_name, sep = "_"))%>%
    select(fcs_files, CN, value)%>%group_by(fcs_files, CN)%>%
    summarise(value = value[1])
fcs_stats = spread(fcs_stats, key = CN, value = value)
t1 = apply(fcs_stats,2,function(x){sum(is.na(x))==0})
fcs_stats= fcs_stats[,t1]%>%as.data.frame()
fcs_stats = mutate(fcs_stats, fcs_files = basename(fcs_files))
fcs_stats[1:10,]

write.csv(fcs_stats, "result_06_summary_statistics.csv", row.names= F)