In [1]:
Output = c("/Users/alexis/Library/CloudStorage/OneDrive-UniversityofNorthCarolinaatChapelHill/CEMALB_DataAnalysisPM/Projects/P1011. Emission Mixtures/P1011.3. Analyses/P1011.3.3. Biomarker Distribution Analysis/Output")
cur_date = "082323"

library(readxl)
library(openxlsx)
library(tidyverse)
library(reshape2)
library(rlang)
library(PMCMRplus)

# reading in files
cytokine_df = data.frame(read_excel("Input/Processed_Cyotkine_Data_081723.xlsx"))
mRNA_df = data.frame(read_excel("Input/Processed_mRNA_Data_081723.xlsx"))

# making some cols numeric
cytokine_df$Condensate_Conc = as.numeric(cytokine_df$Condensate_Conc)
mRNA_df$Concentration = as.numeric(mRNA_df$Concentration)
mRNA_df$Time_Point = as.numeric(mRNA_df$Time_Point)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.2     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.1     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths



Attaching package: ‘rlang’


The following objects are masked from ‘

In [2]:
head(cytokine_df)
head(mRNA_df)

Unnamed: 0_level_0,Subject_ID,Subject_No,Cytokine,Condensate,Burn_Condition,Condensate_Conc,Norm_Cytokine_Conc
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>
1,F_1,1,Eotaxin3,C,F,1,5.438924
2,F_1,1,Eotaxin3,C,F,25,3.630025
3,F_1,1,Eotaxin3,C,F,5,4.694272
4,F_1,1,Eotaxin3,C,S,1,6.165522
5,F_1,1,Eotaxin3,C,S,25,3.703871
6,F_1,1,Eotaxin3,C,S,5,4.562595


Unnamed: 0_level_0,Subject_No,Subject_ID,mRNA,Condensate,Burn_Condition,Concentration,Time_Point,Norm_ddCT
Unnamed: 0_level_1,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
1,6,M_6,HMOX1,PBS,PBS,,24,3.912311
2,5,F_5,HMOX1,PBS,PBS,,4,3.06719
3,1,F_1,HMOX1,PBS,PBS,,4,3.843045
4,2,M_2,HMOX1,PBS,PBS,,4,3.415457
5,3,M_3,HMOX1,PBS,PBS,,4,3.234524
6,4,F_4,HMOX1,PBS,PBS,,4,3.294681


# Research Question: Are there statistically significant differences in each biomarker between condensate samples?

Testing for statistical differences by comparing an individual biomarker's value (ie. concentration or ddCT) between condensate samples (plastic vs. cardboard) within each burn condition (ie. smoldering or flaming), dose (only 1 and 25) and time point (only 24).

Typically, normality and homogeneity of variances would be tested for first, however a non-parametric test (Wilcoxon rank sum) will be used given the small sample size (n = 6).


# Wilcoxon Rank Sum test

In [3]:
wilcox_test_values = function(df, concentration, value, biomarker, biomarker_name){
    # """
    # Running wilcoxon rank sum tests after filtering for biomarker, burn condition, and condensate using a loop. 
    # Ultimately using this test to compare biomarkers (plastic vs. cardboard).

    # :param: dataframe, concentration variable, value, biomarker name, biomarker variable name
    # :output: a dataframe containing the biomarker, variable name, burn condition, concentration,
    # comparison, stat, p value, p adj

    # """
    # variables that will be iterated through
    unique_biomarker = unique(df[[biomarker_name]])
    unique_burn_condition = c("S", "F")
    unique_conc = c(1,25)
    
    values_df = data.frame()
    
    # iterating through each biomarker, protein
    for(i in 1:length(unique_biomarker)){
        for(j in 1:length(unique_burn_condition)){
            for(k in 1:length(unique_conc)){
                # plastic df
                plastic_df = df %>%
                    filter(eval(rlang::parse_expr(biomarker_name)) == unique_biomarker[i], Condensate == "P",
                    Burn_Condition == unique_burn_condition[j], eval(rlang::parse_expr(concentration)) == unique_conc[k])

                # cardboard df
                cardboard_df = df %>%
                    filter(eval(rlang::parse_expr(biomarker_name)) == unique_biomarker[i], Condensate == "C",
                    Burn_Condition == unique_burn_condition[j], eval(rlang::parse_expr(concentration)) == unique_conc[k])

                # wilcox test
                wilcox_test = wilcox.test(plastic_df[[value]], cardboard_df[[value]])

                # calculating FC to get directionality
                FC = log2(mean(cardboard_df[[value]]/mean(plastic_df[[value]])))

                # contains smoking status compared, compartment, cytokine, u stat, and p value
                values_vector = cbind(biomarker, unique_biomarker[i], unique_burn_condition[j], unique_conc[k],
                                      wilcox_test$statistic, wilcox_test$p.value)
                values_df = rbind(values_df, values_vector)
            }
        }
    }

    
    # adding col names
    colnames(values_df) = c("Biomarker", "Variable Name", "Burn Condition", "Concentration", "Statistic", "P Value")
    
    
   # calculating padj values
    values_df = values_df %>%
        group_by(`Burn Condition`, Concentration) %>%
        mutate(`P Adj` = p.adjust(as.numeric(as.character(`P Value`)), method = "fdr"))
    
    return(values_df)
}
                                          
# calling fn
cytokine_wilcox = wilcox_test_values(cytokine_df, "Condensate_Conc", "Norm_Cytokine_Conc", "Cytokine", "Cytokine")
mRNA_wilcox = wilcox_test_values(mRNA_df, "Concentration", "Norm_ddCT", "mRNA", "mRNA")

“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”


In [4]:
# creating 1 final df for export
final_df = rbind(cytokine_wilcox, mRNA_wilcox)

head(final_df)

Biomarker,Variable Name,Burn Condition,Concentration,Statistic,P Value,P Adj
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
Cytokine,Eotaxin3,S,1,20,0.818181818181818,0.9958063
Cytokine,Eotaxin3,S,25,17,0.937229437229437,0.9958063
Cytokine,Eotaxin3,F,1,14,0.588744588744589,0.9958063
Cytokine,Eotaxin3,F,25,18,1.0,1.0
Cytokine,GMCSF,S,1,14,0.588744588744589,0.9958063
Cytokine,GMCSF,S,25,23,0.484848484848485,0.9958063


In [5]:
# exporting
write.xlsx(final_df, paste0(Output,"/", "Wilcoxon_Condensate_Results_", cur_date, ".xlsx"), rowNames = FALSE)