In [11]:
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(parallel)

In [12]:
calculate_stats <- function(df) {
  row_means <- rowMeans(df, na.rm = TRUE)
  row_vars <- apply(df, 1, var, na.rm = TRUE)
  row_sds <- sqrt(row_vars)
  row_cvs <- row_sds / row_means
  result <- data.frame(mean = row_means, variance = row_vars, sd = row_sds, cv = row_cvs)
  return(result)
}


In [13]:
meta_data<-read.csv('/home//jupyter/BRI_Analysis/scRNA/hise_meta_data_2024-01-23_fixed.csv')
meta_data_nonD7<-meta_data %>% filter(!sample.visitName %in% c('Flu Year 1 Day 7','Flu Year 2 Day 7'))

# Calculate CV- Y1D0+Y1D9meta_data0+Y2D0+Y2D90

In [14]:
meta_data_nonD7_4tp<-meta_data_nonD7  %>% filter(sample.visitName %in% c('Flu Year 1 Day 0','Flu Year 1 Day 90',
                                              'Flu Year 2 Day 0','Flu Year 2 Day 90'))

In [15]:
for (i in unique(meta_data_nonD7_4tp$subject.subjectGuid)){

print(i)
meta_data_donor<-meta_data_nonD7_4tp %>% filter(subject.subjectGuid == i)
    
df_list<-mclapply(seq(from = 1, to = dim(meta_data_donor)[1], by = 1),function(x){
df<-read.csv(paste0('/home/jupyter/BRI_Analysis/scRNA/Average_Expression_by_Celltype_and_Sample/',pull(meta_data_donor['pbmc_sample_id'])[x],'_AIFI_L3.csv'),row.names = 1,check.names = FALSE)
df<-as.data.frame(df)
colnames(df)<-paste0(pull(meta_data_donor['sample.visitName'])[x],':',colnames(df))   
return(df)
},mc.cores=dim(meta_data_donor)[1])

df_all<-do.call(cbind,df_list)
colnames(df_all)<-gsub(" ",'_',colnames(df_all))
col_df<-as.data.frame(t(as.data.frame(str_split(colnames(df_all),":"))))
mclapply(unique(col_df$V2),function(celltypes){
df_all_single_celltype <- df_all[, grepl(celltypes, names(df_all), fixed = TRUE), drop = FALSE]
if (ncol(df_all_single_celltype) > 1) {
df_all_single_celltype <- df_all_single_celltype[rowSums(df_all_single_celltype != 0) > 0, ]    
stats <- calculate_stats(df_all_single_celltype)
stats$celltypes<-celltypes
stats$donor<-i
stats$number_of_timepoint<-dim(meta_data_donor)[1]
stats$number_of_timepoint_non_zero_expr<-apply(df_all_single_celltype, 1, function(x) sum(x != 0)) 
write.csv(stats,paste0('01A_CV_4_timepoint/',i,'_',celltypes,'.csv'))
}
},mc.cores=60)


}


[1] "BR1001"
[1] "BR1002"
[1] "BR1003"
[1] "BR1004"
[1] "BR1005"
[1] "BR1006"
[1] "BR1008"
[1] "BR1007"
[1] "BR2003"
[1] "BR2002"
[1] "BR1010"
[1] "BR1009"
[1] "BR1011"
[1] "BR1012"
[1] "BR1013"
[1] "BR1014"
[1] "BR1018"
[1] "BR1033"
[1] "BR2009"
[1] "BR1021"
[1] "BR2038"
[1] "BR1037"
[1] "BR2025"
[1] "BR1031"
[1] "BR1026"
[1] "BR1025"
[1] "BR1024"
[1] "BR2010"
[1] "BR2008"
[1] "BR2013"
[1] "BR1032"
[1] "BR1028"
[1] "BR1040"
[1] "BR1023"
[1] "BR1019"
[1] "BR2005"
[1] "BR2037"
[1] "BR2015"
[1] "BR2039"
[1] "BR1016"
[1] "BR1043"
[1] "BR2035"
[1] "BR2016"
[1] "BR2033"
[1] "BR1044"
[1] "BR2036"
[1] "BR2040"
[1] "BR1042"
[1] "BR2034"
[1] "BR1045"
[1] "BR1017"
[1] "BR2022"
[1] "BR1015"
[1] "BR2041"
[1] "BR1049"
[1] "BR2047"
[1] "BR2048"
[1] "BR1030"
[1] "BR2032"
[1] "BR1050"
[1] "BR2012"
[1] "BR1048"
[1] "BR2044"
[1] "BR1047"
[1] "BR2014"
[1] "BR2020"
[1] "BR2043"
[1] "BR2042"
[1] "BR1051"
[1] "BR2024"
[1] "BR2045"
[1] "BR2029"
[1] "BR2023"
[1] "BR2031"
[1] "BR2046"
[1] "BR1054"
[1] "BR2026"

# Calculate CV- all non-D7

In [16]:
for (i in unique(meta_data_nonD7$subject.subjectGuid)){

print(i)
meta_data_donor<-meta_data_nonD7 %>% filter(subject.subjectGuid == i)
    
df_list<-mclapply(seq(from = 1, to = dim(meta_data_donor)[1], by = 1),function(x){
df<-read.csv(paste0('/home/jupyter/BRI_Analysis/scRNA/Average_Expression_by_Celltype_and_Sample/',pull(meta_data_donor['pbmc_sample_id'])[x],'_AIFI_L3.csv'),row.names = 1,check.names = FALSE)
df<-as.data.frame(df)
colnames(df)<-paste0(pull(meta_data_donor['sample.visitName'])[x],':',colnames(df))   
return(df)
},mc.cores=dim(meta_data_donor)[1])

df_all<-do.call(cbind,df_list)
colnames(df_all)<-gsub(" ",'_',colnames(df_all))
col_df<-as.data.frame(t(as.data.frame(str_split(colnames(df_all),":"))))
mclapply(unique(col_df$V2),function(celltypes){
df_all_single_celltype <- df_all[, grepl(celltypes, names(df_all), fixed = TRUE), drop = FALSE]
if (ncol(df_all_single_celltype) > 1) {
df_all_single_celltype <- df_all_single_celltype[rowSums(df_all_single_celltype != 0) > 0, ]    
stats <- calculate_stats(df_all_single_celltype)
stats$celltypes<-celltypes
stats$donor<-i
stats$number_of_timepoint<-dim(meta_data_donor)[1]
stats$number_of_timepoint_non_zero_expr<-apply(df_all_single_celltype, 1, function(x) sum(x != 0)) 
write.csv(stats,paste0('01A_CV_all_timepoint/',i,'_',celltypes,'.csv'))
}
},mc.cores=60)


}


[1] "BR1001"
[1] "BR1002"
[1] "BR1003"
[1] "BR1004"
[1] "BR1005"
[1] "BR1006"
[1] "BR1008"
[1] "BR1007"
[1] "BR2003"
[1] "BR2002"
[1] "BR1010"
[1] "BR1009"
[1] "BR1011"
[1] "BR1012"
[1] "BR1013"
[1] "BR1014"
[1] "BR1018"
[1] "BR1033"
[1] "BR2009"
[1] "BR1021"
[1] "BR2038"
[1] "BR1037"
[1] "BR2025"
[1] "BR1031"
[1] "BR1026"
[1] "BR1025"
[1] "BR1024"
[1] "BR2010"
[1] "BR2008"
[1] "BR2013"
[1] "BR1032"
[1] "BR1028"
[1] "BR1040"
[1] "BR1023"
[1] "BR1019"
[1] "BR2005"
[1] "BR2037"
[1] "BR2015"
[1] "BR2039"
[1] "BR1016"
[1] "BR1043"
[1] "BR2035"
[1] "BR2016"
[1] "BR2033"
[1] "BR1044"
[1] "BR2036"
[1] "BR2040"
[1] "BR1042"
[1] "BR2034"
[1] "BR1045"
[1] "BR1017"
[1] "BR2022"
[1] "BR1015"
[1] "BR2041"
[1] "BR1049"
[1] "BR2047"
[1] "BR2048"
[1] "BR1030"
[1] "BR2032"
[1] "BR1050"
[1] "BR2012"
[1] "BR1048"
[1] "BR2044"
[1] "BR1047"
[1] "BR2014"
[1] "BR2020"
[1] "BR2043"
[1] "BR2042"
[1] "BR1051"
[1] "BR2024"
[1] "BR2045"
[1] "BR2029"
[1] "BR2023"
[1] "BR2031"
[1] "BR2046"
[1] "BR1054"
[1] "BR2026"

# All other without Y2D90

In [17]:
meta_data_nonD7_nonY2D90<-meta_data %>% filter(!sample.visitName %in% c('Flu Year 1 Day 7','Flu Year 2 Day 7','Flu Year 2 Day 90'))

In [18]:
for (i in unique(meta_data_nonD7_nonY2D90$subject.subjectGuid)){

print(i)
meta_data_donor<-meta_data_nonD7_nonY2D90 %>% filter(subject.subjectGuid == i)
    
df_list<-mclapply(seq(from = 1, to = dim(meta_data_donor)[1], by = 1),function(x){
df<-read.csv(paste0('/home/jupyter/BRI_Analysis/scRNA/Average_Expression_by_Celltype_and_Sample/',pull(meta_data_donor['pbmc_sample_id'])[x],'_AIFI_L3.csv'),row.names = 1,check.names = FALSE)
df<-as.data.frame(df)
colnames(df)<-paste0(pull(meta_data_donor['sample.visitName'])[x],':',colnames(df))   
return(df)
},mc.cores=dim(meta_data_donor)[1])

df_all<-do.call(cbind,df_list)
colnames(df_all)<-gsub(" ",'_',colnames(df_all))
col_df<-as.data.frame(t(as.data.frame(str_split(colnames(df_all),":"))))
mclapply(unique(col_df$V2),function(celltypes){
df_all_single_celltype <- df_all[, grepl(celltypes, names(df_all), fixed = TRUE), drop = FALSE]
if (ncol(df_all_single_celltype) > 1) {
df_all_single_celltype <- df_all_single_celltype[rowSums(df_all_single_celltype != 0) > 0, ]    
stats <- calculate_stats(df_all_single_celltype)
stats$celltypes<-celltypes
stats$donor<-i
stats$number_of_timepoint<-dim(meta_data_donor)[1]
stats$number_of_timepoint_non_zero_expr<-apply(df_all_single_celltype, 1, function(x) sum(x != 0)) 
write.csv(stats,paste0('01A_CV_all_timepoint_nonY2D90/',i,'_',celltypes,'.csv'))
}
},mc.cores=60)


}


[1] "BR1001"
[1] "BR1002"
[1] "BR1003"
[1] "BR1004"
[1] "BR1005"
[1] "BR1006"
[1] "BR1008"
[1] "BR1007"
[1] "BR2003"
[1] "BR2002"
[1] "BR1010"
[1] "BR1009"
[1] "BR1011"
[1] "BR1012"
[1] "BR1013"
[1] "BR1014"
[1] "BR1018"
[1] "BR1033"
[1] "BR2009"
[1] "BR1021"
[1] "BR2038"
[1] "BR1037"
[1] "BR2025"
[1] "BR1031"
[1] "BR1026"
[1] "BR1025"
[1] "BR1024"
[1] "BR2010"
[1] "BR2008"
[1] "BR2013"
[1] "BR1032"
[1] "BR1028"
[1] "BR1040"
[1] "BR1023"
[1] "BR1019"
[1] "BR2005"
[1] "BR2037"
[1] "BR2015"
[1] "BR2039"
[1] "BR1016"
[1] "BR1043"
[1] "BR2035"
[1] "BR2016"
[1] "BR2033"
[1] "BR1044"
[1] "BR2036"
[1] "BR2040"
[1] "BR1042"
[1] "BR2034"
[1] "BR1045"
[1] "BR1017"
[1] "BR2022"
[1] "BR1015"
[1] "BR2041"
[1] "BR1049"
[1] "BR2047"
[1] "BR2048"
[1] "BR1030"
[1] "BR2032"
[1] "BR1050"
[1] "BR2012"
[1] "BR1048"
[1] "BR2044"
[1] "BR1047"
[1] "BR2014"
[1] "BR2020"
[1] "BR2043"
[1] "BR2042"
[1] "BR1051"
[1] "BR2024"
[1] "BR2045"
[1] "BR2029"
[1] "BR2023"
[1] "BR2031"
[1] "BR2046"
[1] "BR1054"
[1] "BR2026"