### Functions

In [1]:
t_test <- function(data
                  ,...){
    t.test(value~group
           ,data
           ,...
          )
}

In [2]:
welch_test <- function(data
                  ,...){
    t.test(value~group
           ,data
           ,var.equal = FALSE
           ,...
          )
}

In [3]:
mann_whitney_u_test <- function(data
                               ,...){
    wilcox.test(value~group
                ,data
                ,exact = NULL
                ,correct = TRUE
                ,conf.int = FALSE
                ,...
               )
}

In [4]:
moods_median_test <- function(data
                             ,...){
    mood.test(value~group
              ,data
              ,...)
}

In [5]:
run_statistics <- function(experiment
                         ,groups_to_test = NA 
                          # If there are more than two groups in the dataset, by default all vs all groups will be tested. 
                          # Provide character vector in form of e.g. c("het vs WT", "KO vs WT") to perform only selected comparisons
                         ,adjustment_method = "bonferroni"
                          ){
    # read in data
    data <- read.csv(file = paste0("./input/"
                                  ,experiment
                                  ,".tsv"
                                  )
                    ,sep = "\t"
                     ,dec = ","
                    ,header = TRUE)
    print(str(data)) # REMOVE THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    cat('\n')
    
    # how many groups we have?
    groups <- unique(data$group)
    print("we have following groups in the data:")
    print(groups) # REMOVE THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    cat('\n')
    
    all_vs_all <- (length(groups >2)) & is.na(groups_to_test) # TRUE or FALSE
    
    # do we have biological replicates?
    ### if yes, paired version will be used
    paired <- sum(!is.na(data$bio_rep)) # TRUE or FALSE
    if(paired) {print("data are paired observations")}
    cat('\n')

    # pick the correct test
    ### check the normality of distribution assumption in all groups
    print("run Shapiro test to check the assumption of the distribution normality...")
    not_norm <- sapply(groups
                   ,function(group){
                       idx_group <- data$group == group
                       pvalue <- shapiro.test(data$value[idx_group])$p.value
                       
                       print(group) # REMOVE THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                       ifelse(pvalue <= 0.05,print("non-normal"),print("normal")) # REMOVE THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                       pvalue <= 0.05
                   })
    norm_dist <- sum(not_norm)== 0 # TRUE or FALSE
    print("overall distribution is:") # REMOVE THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ifelse(norm_dist,print("normal"),print("non-normal")) # REMOVE THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    cat('\n')
    
    ### check the homogeneity of variance assumption in all groups
    not_homogeneic <- if(norm_dist){
        print("run Bartlett test to check the assumption of the variance homogeneity...")
        pvalue <- bartlett.test(value~group, data = data)$p.value 
        pvalue <= 0.05
    } else {
        print("run Fligner-Killeen test to check the assumption of the variance homogeneity...")
        pvalue <- fligner.test(value~group, data = data)$p.value 
        pvalue <= 0.05
    }
    
    homo_var <- sum(not_homogeneic)== 0 # TRUE or FALSE
    print("overall variance is:") # REMOVE THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ifelse(homo_var,print("stable"),print("non-stable")) # REMOVE THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    
    ### pick the test
    which_test <- if(norm_dist){ 
        ifelse(homo_var
              ,"t-test"  # distribution is normal, variance is stable
              ,"Welch test"  # distribution is normal, variance is not stable
              )
    }else {
        ifelse(homo_var
              ,"Mann-Whitney U-test"  # distribution is not normal, variance is stable
              ,"Mood's Median-test" # distribution is not normal, variance is not stable
              )
    }
    
    ### run the test
    run_one_comparison <- function(data){
        if(which_test == "t-test"){
            t_test(data, paired = paired)$p.value
    } else if(which_test == "Welch test"){
        welch_test(data, paired = paired)$p.value
    } else if(which_test == "Mann-Whitney U-test"){
        mann_whitney_u_test(data, paired = paired)$p.value
    } else if(which_test == "Mood's Median-test"){
        moods_median_test(data)$p.value
    } else error("ERROR: undefined test")
        }
    
    p.values <- sapply(groups_to_test
                      ,function(comparison){
                          group1 <- sub(" vs .*","",comparison)
                          group2 <- sub(".* vs ","",comparison)
                          
                          idx <- data$group %in% c(group1, group2)
                          run_one_comparison(data[idx,])
                      })
    
    # adjust for multiple testing if needed
    p.adjs <- p.adjust(p.values
                      ,method = adjustment_method)
        
    
    output <- data.frame(comparison = groups_to_test
                        ,p.value = p.values)
        
    if(length(groups_to_test)> 1){output$p.adj = p.adjs}
    
    # print mesages
    
    cat('\n')
    print(paste0("Since the normality of distribution assumption is "
                ,ifelse(norm_dist
                       ,"met"
                       ,"not met"
                       )
                 ," and the homogeneity of variace assumption is "
                 ,ifelse(homo_var
                        ,"met"
                        ,"not met"
                        )
                 ,", we use the "
                 ,which_test
                 ,"."
                )
         )
    
    cat('\n')
    if(paired){print(paste0("Since we have several independent biological replicates as paired observations across groups, we use the paired version of the "
                           ,which_test
                           ," test.")
                    )}
    
    
    cat('\n')
    print("We are doing the comparison of the following group(s):")
    print(ifelse(all_vs_all
          ,"all vs all"
          ,groups_to_test))
    
    
    cat('\n')    
    print(output)

    # export results
        write.table(output
                 ,file = paste0("./output/"
                                ,experiment
                                ,"_stats.tsv"
                                )
                 ,sep = "\t"
                 ,quote = FALSE
                 ,col.names = TRUE
                 ,row.names = FALSE)
    
}

### Run analysis

In [6]:
run_statistics("Fig1C_DGEP"
              ,groups_to_test = "MUT vs WT")

'data.frame':	51 obs. of  5 variables:
 $ value  : num  67.3 51.3 85.5 135.6 52.7 ...
 $ group  : chr  "WT" "WT" "WT" "WT" ...
 $ tec_rep: logi  NA NA NA NA NA NA ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : logi  NA NA NA NA NA NA ...
NULL

[1] "we have following groups in the data:"
[1] "WT"  "MUT"


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT"
[1] "non-normal"
[1] "MUT"
[1] "normal"
[1] "overall distribution is:"
[1] "non-normal"

[1] "run Fligner-Killeen test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "stable"

[1] "Since the normality of distribution assumption is not met and the homogeneity of variace assumption is met, we use the Mann-Whitney U-test."


[1] "We are doing the comparison of the following group(s):"
[1] "MUT vs WT"

          comparison   p.value
MUT vs WT  MUT vs WT 0.1743283


In [7]:
run_statistics("Fig1D_QMI"
              ,groups_to_test = "MUT vs WT")

'data.frame':	43 obs. of  5 variables:
 $ value  : int  79 91 75 82 86 77 79 82 68 79 ...
 $ group  : chr  "WT" "WT" "WT" "WT" ...
 $ tec_rep: logi  NA NA NA NA NA NA ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : logi  NA NA NA NA NA NA ...
NULL

[1] "we have following groups in the data:"
[1] "WT"  "MUT"


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT"
[1] "normal"
[1] "MUT"
[1] "normal"
[1] "overall distribution is:"
[1] "normal"

[1] "run Bartlett test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "non-stable"

[1] "Since the normality of distribution assumption is met and the homogeneity of variace assumption is not met, we use the Welch test."


[1] "We are doing the comparison of the following group(s):"
[1] "MUT vs WT"

          comparison     p.value
MUT vs WT  MUT vs WT 0.003058479


In [8]:
run_statistics("Fig2B_FACS_OCI-Ly1"
              ,groups_to_test = c("het vs WT"
                                 ,"KO vs WT"
                                 )
              )

'data.frame':	9 obs. of  5 variables:
 $ value  : num  17.4 14.5 13 2.8 2.24 1.84 1.8 1.84 1.38
 $ group  : chr  "WT" "WT" "WT" "het" ...
 $ tec_rep: int  1 2 3 1 2 3 1 2 3
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : int  NA 20200507 20200514 NA 20200507 20200514 NA 20200507 20200514
NULL

[1] "we have following groups in the data:"
[1] "WT"  "het" "KO" 


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT"
[1] "normal"
[1] "het"
[1] "normal"
[1] "KO"
[1] "normal"
[1] "overall distribution is:"
[1] "normal"

[1] "run Bartlett test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "non-stable"

[1] "Since the normality of distribution assumption is met and the homogeneity of variace assumption is not met, we use the Welch test."


[1] "We are doing the comparison of the following group(s):"
[1] "het vs WT" "KO vs WT" 

          comparison     p.value      p.adj
het vs WT  het vs WT 0.007950932 0.01590186


In [9]:
run_statistics("Fig2B_FACS_OCI-Ly8"
              ,groups_to_test = c("het vs WT"
                                 ,"KO vs WT"
                                 ,"het+ARID1A vs WT")
              )

'data.frame':	12 obs. of  5 variables:
 $ value  : num  21 18 14.4 3.7 5.81 4.37 2.4 3.05 1.72 17.2 ...
 $ group  : chr  "WT" "WT" "WT" "het" ...
 $ tec_rep: int  1 2 3 1 2 3 1 2 3 1 ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : int  NA 20200507 20200514 NA 20200507 20200514 NA 20200507 20200514 NA ...
NULL

[1] "we have following groups in the data:"
[1] "WT"         "het"        "KO"         "het+ARID1A"


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT"
[1] "normal"
[1] "het"
[1] "normal"
[1] "KO"
[1] "normal"
[1] "het+ARID1A"
[1] "normal"
[1] "overall distribution is:"
[1] "normal"

[1] "run Bartlett test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "stable"

[1] "Since the normality of distribution assumption is met and the homogeneity of variace assumption is met, we use the t-test."


[1] "We are doing the comparison of the following group(s):"
[1] "het vs WT"        "KO vs WT"         "h

In [10]:
run_statistics("Fig2E_qPCR_OCI-Ly1"
              ,groups_to_test = c("het vs WT"
                                 ,"KO vs WT"
                                 )
              )

'data.frame':	9 obs. of  5 variables:
 $ value  : num  1 1 1 0.42 0.71 0.1 0.23 0.31 0.04
 $ group  : chr  "WT" "WT" "WT" "het" ...
 $ tec_rep: chr  "exp 1" "exp 2" "exp 3" "exp 1" ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : logi  NA NA NA NA NA NA ...
NULL

[1] "we have following groups in the data:"
[1] "WT"  "het" "KO" 


[1] "run Shapiro test to check the assumption of the distribution normality..."


ERROR: Error in shapiro.test(data$value[idx_group]): alle 'x' Werte sind gleich


In [None]:
run_statistics("Fig2E_qPCR_OCI-Ly8"
              ,groups_to_test = c("het vs WT"
                                 ,"KO vs WT"
                                 ,"het+ARID1A vs WT")
              )

In [None]:
run_statistics("Fig3E_qPCR_OCI-Ly1"
              ,groups_to_test = c("het vs WT"
                                 ,"KO vs WT"
                                 )
              )

In [None]:
run_statistics("Fig3E_qPCR_OCI-Ly8"
              ,groups_to_test = c("het vs WT"
                                 ,"KO vs WT"
                                 ,"het+ARID1A vs WT")
              )

In [11]:
run_statistics("Fig4C_luc_promoter1"
              ,groups_to_test = c("50 ng vs 0 ng"
                                 ,"200 ng vs 0 ng"
                                 ,"500 ng vs 0 ng")
              )

'data.frame':	12 obs. of  5 variables:
 $ value  : num  1.1 1.14 0.765 7.2 5.7 ...
 $ group  : chr  "0 ng" "0 ng" "0 ng" "50 ng" ...
 $ tec_rep: chr  "exp 1" "exp 2" "exp 3" "exp 1" ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : logi  NA NA NA NA NA NA ...
NULL

[1] "we have following groups in the data:"
[1] "0 ng"   "50 ng"  "200 ng" "500 ng"


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "0 ng"
[1] "normal"
[1] "50 ng"
[1] "normal"
[1] "200 ng"
[1] "normal"
[1] "500 ng"
[1] "non-normal"
[1] "overall distribution is:"
[1] "non-normal"

[1] "run Fligner-Killeen test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "stable"


“kann bei Bindungen keinen exakten p-Wert Berechnen”



[1] "Since the normality of distribution assumption is not met and the homogeneity of variace assumption is met, we use the Mann-Whitney U-test."


[1] "We are doing the comparison of the following group(s):"
[1] "50 ng vs 0 ng"  "200 ng vs 0 ng" "500 ng vs 0 ng"

                   comparison   p.value     p.adj
50 ng vs 0 ng   50 ng vs 0 ng 0.1000000 0.3000000
200 ng vs 0 ng 200 ng vs 0 ng 0.1000000 0.3000000
500 ng vs 0 ng 500 ng vs 0 ng 0.0765225 0.2295675


In [12]:
run_statistics("Fig4C_luc_promoter2"
              ,groups_to_test = c("50 ng vs 0 ng"
                                 ,"200 ng vs 0 ng"
                                 ,"500 ng vs 0 ng")
              )

'data.frame':	12 obs. of  5 variables:
 $ value  : num  0.99 1.28 0.73 9.5 7.5 ...
 $ group  : chr  "0 ng" "0 ng" "0 ng" "50 ng" ...
 $ tec_rep: chr  "exp 1" "exp 2" "exp 3" "exp 1" ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : logi  NA NA NA NA NA NA ...
NULL

[1] "we have following groups in the data:"
[1] "0 ng"   "50 ng"  "200 ng" "500 ng"


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "0 ng"
[1] "normal"
[1] "50 ng"
[1] "normal"
[1] "200 ng"
[1] "normal"
[1] "500 ng"
[1] "non-normal"
[1] "overall distribution is:"
[1] "non-normal"

[1] "run Fligner-Killeen test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "stable"


“kann bei Bindungen keinen exakten p-Wert Berechnen”



[1] "Since the normality of distribution assumption is not met and the homogeneity of variace assumption is met, we use the Mann-Whitney U-test."


[1] "We are doing the comparison of the following group(s):"
[1] "50 ng vs 0 ng"  "200 ng vs 0 ng" "500 ng vs 0 ng"

                   comparison   p.value     p.adj
50 ng vs 0 ng   50 ng vs 0 ng 0.1000000 0.3000000
200 ng vs 0 ng 200 ng vs 0 ng 0.1000000 0.3000000
500 ng vs 0 ng 500 ng vs 0 ng 0.0765225 0.2295675


In [None]:
run_statistics("Fig4F_qPCR_OCI-Ly8"
              ,groups_to_test = c("het+RUNX3 vs het"
                                 ,"het+RUNX3 vs WT"
                                 )
              )

In [13]:
run_statistics("Fig4G_FACS_OCI-Ly8"
              ,groups_to_test = c("het+RUNX3 vs het"
                                 ,"het+RUNX3 vs WT"
                                 )
              )

'data.frame':	12 obs. of  5 variables:
 $ value  : num  20.5 22.8 18.3 13.4 16.5 9.72 31.1 29.9 14.8 7.6 ...
 $ group  : chr  "WT" "WT" "WT" "het" ...
 $ tec_rep: int  1 2 3 1 2 3 1 2 3 1 ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : int  20210629 20210623 20210907 20210629 20210623 20210907 20210629 20210623 20210907 20210629 ...
NULL

[1] "we have following groups in the data:"
[1] "WT"        "het"       "het+RUNX3" "KO"       


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT"
[1] "normal"
[1] "het"
[1] "normal"
[1] "het+RUNX3"
[1] "normal"
[1] "KO"
[1] "normal"
[1] "overall distribution is:"
[1] "normal"

[1] "run Bartlett test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "stable"

[1] "Since the normality of distribution assumption is met and the homogeneity of variace assumption is met, we use the t-test."


[1] "We are doing the comparison of the following group(s):"
[1] "het+RUNX3 vs he

In [14]:
run_statistics("Fig5B_FACS_OCI-Ly1"
              ,groups_to_test = c("het 0 ng vs WT 0 ng"
                                 ,"het+RUNX3 0 ng vs WT 0 ng"
                                 ,"KO 0 ng vs WT 0 ng"
                                  ,"het 3 ng vs WT 3 ng"
                                 ,"het+RUNX3 3 ng vs WT 3 ng"
                                 ,"KO 3 ng vs WT 3 ng"
                                  ,"het 30 ng vs WT 30 ng"
                                 ,"het+RUNX3 30 ng vs WT 30 ng"
                                 ,"KO 30 ng vs WT 30 ng"
                                  ,"het 300 ng vs WT 300 ng"
                                 ,"het+RUNX3 300 ng vs WT 300 ng"
                                 ,"KO 300 ng vs WT 300 ng"
                                 )
              )

'data.frame':	48 obs. of  5 variables:
 $ value  : num  3.34 2.5 2.86 4.68 4.17 ...
 $ group  : chr  "WT 0 ng" "WT 0 ng" "WT 0 ng" "het 0 ng" ...
 $ tec_rep: int  1 2 3 1 2 3 1 2 3 1 ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : logi  NA NA NA NA NA NA ...
NULL

[1] "we have following groups in the data:"
 [1] "WT 0 ng"          "het 0 ng"         "het+RUNX3 0 ng"   "KO 0 ng"         
 [5] "WT 3 ng"          "het 3 ng"         "het+RUNX3 3 ng"   "KO 3 ng"         
 [9] "WT 30 ng"         "het 30 ng"        "het+RUNX3 30 ng"  "KO 30 ng"        
[13] "WT 300 ng"        "het 300 ng"       "het+RUNX3 300 ng" "KO 300 ng"       


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT 0 ng"
[1] "normal"
[1] "het 0 ng"
[1] "normal"
[1] "het+RUNX3 0 ng"
[1] "normal"
[1] "KO 0 ng"
[1] "normal"
[1] "WT 3 ng"
[1] "normal"
[1] "het 3 ng"
[1] "normal"
[1] "het+RUNX3 3 ng"
[1] "normal"
[1] "KO 3 ng"
[1] "normal"
[1] "WT 30 ng"
[1] "non-normal"
[1] "het 30 n

In [15]:
run_statistics("Fig5B_FACS_OCI-Ly8"
              ,groups_to_test = c("het 0 ng vs WT 0 ng"
                                 ,"het+RUNX3 0 ng vs WT 0 ng"
                                 ,"KO 0 ng vs WT 0 ng"
                                  ,"het 3 ng vs WT 3 ng"
                                 ,"het+RUNX3 3 ng vs WT 3 ng"
                                 ,"KO 3 ng vs WT 3 ng"
                                  ,"het 30 ng vs WT 30 ng"
                                 ,"het+RUNX3 30 ng vs WT 30 ng"
                                 ,"KO 30 ng vs WT 30 ng"
                                  ,"het 300 ng vs WT 300 ng"
                                 ,"het+RUNX3 300 ng vs WT 300 ng"
                                 ,"KO 300 ng vs WT 300 ng"
                                 )
              )

'data.frame':	48 obs. of  5 variables:
 $ value  : num  6.27 3.18 2.65 2.29 1.42 ...
 $ group  : chr  "WT 0 ng" "WT 0 ng" "WT 0 ng" "het 0 ng" ...
 $ tec_rep: int  1 2 3 1 2 3 1 2 3 1 ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : logi  NA NA NA NA NA NA ...
NULL

[1] "we have following groups in the data:"
 [1] "WT 0 ng"          "het 0 ng"         "het+RUNX3 0 ng"   "KO 0 ng"         
 [5] "WT 3 ng"          "het 3 ng"         "het+RUNX3 3 ng"   "KO 3 ng"         
 [9] "WT 30 ng"         "het 30 ng"        "het+RUNX3 30 ng"  "KO 30 ng"        
[13] "WT 300 ng"        "het 300 ng"       "het+RUNX3 300 ng" "KO 300 ng"       


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT 0 ng"
[1] "normal"
[1] "het 0 ng"
[1] "normal"
[1] "het+RUNX3 0 ng"
[1] "normal"
[1] "KO 0 ng"
[1] "normal"
[1] "WT 3 ng"
[1] "normal"
[1] "het 3 ng"
[1] "normal"
[1] "het+RUNX3 3 ng"
[1] "normal"
[1] "KO 3 ng"
[1] "normal"
[1] "WT 30 ng"
[1] "normal"
[1] "het 30 ng"


In [16]:
run_statistics("Fig5E_FACS_OCI-Ly8"
              ,groups_to_test = c("het+RUNX3 vs het"
                                 ,"het+RUNX3 vs WT"
                                 )
              )

'data.frame':	20 obs. of  5 variables:
 $ value  : num  19.8 30.12 18.37 7.01 23.37 ...
 $ group  : chr  "WT" "WT" "WT" "WT" ...
 $ tec_rep: logi  NA NA NA NA NA NA ...
 $ bio_rep: chr  "CD8.1" "CD8.2" "CD8.3" "CD8.333" ...
 $ date   : int  20211201 20211207 20211220 20220316 20220318 20211201 20211207 20211220 20220316 20220318 ...
NULL

[1] "we have following groups in the data:"
[1] "WT"        "het"       "het+RUNX3" "KO"       

[1] "data are paired observations"

[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT"
[1] "normal"
[1] "het"
[1] "non-normal"
[1] "het+RUNX3"
[1] "normal"
[1] "KO"
[1] "non-normal"
[1] "overall distribution is:"
[1] "non-normal"

[1] "run Fligner-Killeen test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "stable"

[1] "Since the normality of distribution assumption is not met and the homogeneity of variace assumption is met, we use the Mann-Whitney U-test."

[1] "Since we have 

In [17]:
run_statistics("FigS1B_FACS"
              ,groups_to_test = c("MUT vs WT")
              )

'data.frame':	12 obs. of  6 variables:
 $ value  : num  17.4 14.5 13 21 18 14.4 5.89 4.06 2.98 9.16 ...
 $ group  : chr  "WT" "WT" "WT" "WT" ...
 $ group2 : chr  "OCI-Ly1" "OCI-Ly1" "OCI-Ly1" "OCI-Ly8" ...
 $ tec_rep: int  1 2 3 1 2 3 1 2 3 1 ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : logi  NA NA NA NA NA NA ...
NULL

[1] "we have following groups in the data:"
[1] "WT"  "MUT"


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT"
[1] "normal"
[1] "MUT"
[1] "normal"
[1] "overall distribution is:"
[1] "normal"

[1] "run Bartlett test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "stable"

[1] "Since the normality of distribution assumption is met and the homogeneity of variace assumption is met, we use the t-test."


[1] "We are doing the comparison of the following group(s):"
[1] "MUT vs WT"

          comparison     p.value
MUT vs WT  MUT vs WT 0.001363183


In [18]:
run_statistics("FigS2D_qPCR_OCI-Ly1"
              ,groups_to_test = c("het vs WT"
                                 ,"het+RUNX3 vs WT"
                                 )
              )

In [19]:
run_statistics("FigS2E_FACS_OCI-Ly1"
              ,groups_to_test = c("het+RUNX3 vs het"
                                 ,"het+RUNX3 vs WT"
                                 )
              )

'data.frame':	12 obs. of  5 variables:
 $ value  : num  23.3 16.6 14.9 7.01 3.96 5.18 12.7 7.01 11.9 8.99 ...
 $ group  : chr  "WT" "WT" "WT" "het" ...
 $ tec_rep: int  1 2 3 1 2 3 1 2 3 1 ...
 $ bio_rep: logi  NA NA NA NA NA NA ...
 $ date   : logi  NA NA NA NA NA NA ...
NULL

[1] "we have following groups in the data:"
[1] "WT"        "het"       "het+RUNX3" "KO"       


[1] "run Shapiro test to check the assumption of the distribution normality..."
[1] "WT"
[1] "normal"
[1] "het"
[1] "normal"
[1] "het+RUNX3"
[1] "normal"
[1] "KO"
[1] "normal"
[1] "overall distribution is:"
[1] "normal"

[1] "run Bartlett test to check the assumption of the variance homogeneity..."
[1] "overall variance is:"
[1] "stable"

[1] "Since the normality of distribution assumption is met and the homogeneity of variace assumption is met, we use the t-test."


[1] "We are doing the comparison of the following group(s):"
[1] "het+RUNX3 vs het" "het+RUNX3 vs WT" 

                       comparison   p.value    

In [20]:
sessionInfo()

R version 4.2.2 (2022-10-31)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.7

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib

locale:
[1] C/UTF-8/C/C/C/C

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] fansi_1.0.4     crayon_1.5.2    digest_0.6.33   utf8_1.2.3     
 [5] IRdisplay_1.1   repr_1.1.4      lifecycle_1.0.3 jsonlite_1.8.7 
 [9] evaluate_0.21   pillar_1.9.0    rlang_1.1.1     cli_3.6.1      
[13] uuid_1.1-0      vctrs_0.6.3     IRkernel_1.3.1  tools_4.2.2    
[17] glue_1.6.2      fastmap_1.1.1   compiler_4.2.2  base64enc_0.1-3
[21] pbdZMQ_0.3-8    htmltools_0.5.6